-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathword-subsets.py
132 lines (119 loc) · 4.6 KB
/
word-subsets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# 916. Word Subsets
# 🟠 Medium
#
# https://leetcode.com/problems/word-subsets/
#
# Tags: Array - Hash Table - String
import timeit
from collections import Counter
from functools import reduce
from operator import or_
from typing import List
# Looking at the description, it seems like the best complexity we can
# try to achieve is O(n) since we will need to visit at least once, each
# character of the input.
# One observation that helps us, is that the length of the input words
# is restricted and, while there could be a high number of words, they
# are pretty short.
#
# 1 <= words1.length, words2.length <= 104
# 1 <= words1[i].length, words2[i].length <= 10
#
# We can take all the subsequences in words2 and build a dictionary of
# frequencies needed to be a universal word. Then check all words in
# word1 against that dictionary.
#
# Time complexity: O(n) - We visit each character in the input a
# constant number of times.
# Space complexity: O(1) - If we don't consider the input and output
# lists, we are only keeping the frequencies dictionary in memory, it
# has a max length of 10.
#
# Runtime: 1712 ms, faster than 26.01% of Python3 online submissions for
# Word Subsets.
# Memory Usage: 18.5 MB, less than 87.67% of Python3 online submissions
# for Word Subsets.
class CounterAndAll:
def wordSubsets(self, words1: List[str], words2: List[str]) -> List[str]:
# Build a dictionary of frequencies needed to be universal.
# O(n) - n: combined number of characters in words2
freq = Counter(words2[0])
for word in words2[1:]:
# Union of frequencies, preserves the maximum values for
# each key in both counters.
freq = freq | Counter(word)
# Store the results in a list.
res = []
# Since the input words have a max length of 10, if we need more
# than 10 characters to be a universal words, none of the input
# words will match, return an empty set.
if sum(freq.values()) > 10:
return res
# Iterate over the words in words1 checking them against the
# frequencies dictionary.
# O(1) - len(word1) <= 10
for word in words1:
wc = Counter(word)
# For word to be a superset of all words in word2, it needs
# to have the same of higher frequency for each entry in
# the frequencies dictionary.
# O(1) - len(freq) <= 10
if all(wc[x] >= freq[x] for x in freq):
res.append(word)
return res
# Similar logic to the previous solution but using some Python magic.
# In this example, using built-in functions does not seem to improve
# performance.
#
# Runtime: 2493 ms, faster than 5.01% of Python3 online submissions for
# Word Subsets.
# Memory Usage: 18.6 MB, less than 54.67% of Python3 online submissions
# for Word Subsets.
class BuiltInFns:
def wordSubsets(self, words1: List[str], words2: List[str]) -> List[str]:
# Substitute the for loop and | operator for reduce and or_
freq = reduce(or_, (Counter(w) for w in words2))
# Substitute the if check and all() call for list comprehension
# with an internal check.
return [word for word in words1 if freq & Counter(word) == freq]
def test():
executors = [CounterAndAll, BuiltInFns]
tests = [
[
["amazon", "apple", "facebook", "google", "leetcode"],
["eoo", "eo", "a", "b", "c", "k", "g"],
[],
],
[
["amazon", "apple", "facebook", "google", "leetcode"],
["eoo", "eo", "a", "b", "c", "k"],
["facebook"],
],
[
["amazon", "apple", "facebook", "google", "leetcode"],
["e", "o"],
["facebook", "google", "leetcode"],
],
[
["amazon", "apple", "facebook", "google", "leetcode"],
["l", "e"],
["apple", "google", "leetcode"],
],
]
for executor in executors:
start = timeit.default_timer()
for _ in range(int(float("1"))):
for col, t in enumerate(tests):
sol = executor()
result = sol.wordSubsets(t[0], t[1])
exp = t[2]
assert result == exp, (
f"\033[93m» {result} <> {exp}\033[91m for"
+ f" test {col} using \033[1m{executor.__name__}"
)
stop = timeit.default_timer()
used = str(round(stop - start, 5))
cols = "{0:20}{1:10}{2:10}"
res = cols.format(executor.__name__, used, "seconds")
print(f"\033[92m» {res}\033[0m")
test()