-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcodeU3_refined.py
217 lines (165 loc) · 7.69 KB
/
codeU3_refined.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 10 09:17:26 2017
Exercise 3, version 2
@author: Luiza
"""
from collections import deque
import unittest
import numpy as np
class Dictionary(object):
'''Stores lower-case vocabulary and all its prefixes as a set
Two functions isWord and isPrefix are defined to check if given word/prefix is found
'''
def __init__(self, word_list):
'''Initialization with a non-empty word-list
prefixes are found automatically
'''
assert word_list, 'Please provide a non-empty list of words'
word_list = [w.lower() for w in word_list if w]
self.max_word_len = max(map(len, word_list))
self.words = set(word_list)
self.word_prefixes = set()
for word in self.words:
self.word_prefixes.update(self.getPrefixes(word))
def getPrefixes(self, word):
'''Returns the list of all prefixes of given word
including the word itself
'''
return set([word[0:n] for n in range(1, len(word)+1)])
def isWord(self, word):
'''Returns whether the word presents in the vocabulary
the function is case-invariant
'''
if not isinstance(word, str):
assert TypeError, 'Input should be a string'
return word.lower() in self.words
def isPrefix(self, prefix):
'''Returns whether the string is the prefix of one of the words in vocabulary
the function is case-invariant
'''
if not isinstance(prefix, str):
assert TypeError, 'Input should be a string'
return prefix.lower() in self.word_prefixes
class TestDictionary(unittest.TestCase):
'''Testing dictionary basic functions
'''
def setUp(self):
self.d = Dictionary(['cats', 'dogs', 'cars', ' ', ''])
def test_empty_input(self):
self.assertFalse(self.d.isWord(' '))
self.assertFalse(self.d.isPrefix(''))
def test_case(self):
self.assertTrue(self.d.isWord('Cats'))
self.assertTrue(self.d.isPrefix('DOGS'))
def test_normal_input_isword(self):
self.assertTrue(self.d.isWord('cats'))
self.assertFalse(self.d.isWord('cat'))
def test_normal_input_isprefix(self):
self.assertTrue(self.d.isPrefix('car'))
self.assertFalse(self.d.isPrefix('cs'))
class CharGrid(object):
'''Given (n by m) grid of letters and a dictionary
Return all possible words from dictionary that can be formed by traversing
the grid cells without visiting the same cell twice
'''
def __init__(self, char_matrix, word_list):
'''Initialization of char matrix as 2D numpy array
checks whether we have a proper matrix and character values in it,
saves them along with matrix shape as attributes and dictionary
Args:
char_matrix: char_matrix as 2D numpy
word_list: a list of strings, dictionary
'''
char_array = char_matrix.flatten() # flattening for checking matrix shape correctness
for c in char_array:
if isinstance(c, list):
assert TypeError, 'Please provide matrix type of data'
return
if not c.isalpha() or len(c) > 1:
assert TypeError, 'Please provide grid values with only char entries'
return
self.grid = char_matrix
self.dict = Dictionary(word_list)
(self.m, self.n) = np.shape(char_matrix)
def route_to_word(self, route):
'''Finds a word, corresponding to the given route
Args:
route: a list of (x,y) indices
Returns:
word, corresponding to this route
empty string if route is empty
'''
chars = [self.grid[x, y] for (x, y) in route]
return ''.join(chars)
def route_extend(self, route):
'''Extends the route with 1 step into 8 possible directions
Args:
route: a list of (x,y) indices
Returns:
all possible routes where 1 step made in one of 8 directions
each route is a list of tuple coordinates
'''
last_x = route[-1][0] #last route coordinate x
last_y = route[-1][1] #last route coordinate y
cells_set = set(route)
route_extended = []
for i in range(-1, 2):
for j in range(-1, 2):
if i == j and i == 0:
continue
if 0 <= last_x + i < self.m and 0 <= last_y + j < self.n\
and (last_x + i, last_y + j) not in cells_set:
route_extended.append(route + [(last_x + i, last_y + j)])
''' old long alternative
if last_x + 1 < self.m and (last_x + 1, last_y) not in cells_set: #ensure we don't visit the same cell twice
route_extended.append(route + [(last_x + 1, last_y)])
if last_x + 1 < self.m and last_y + 1 < self.n and (last_x + 1, last_y + 1) not in cells_set:
route_extended.append(route + [(last_x + 1, last_y + 1)])
if last_y + 1 < self.n and (last_x, last_y + 1) not in cells_set:
route_extended.append(route + [(last_x, last_y + 1)])
if last_x - 1 >= 0 and (last_x - 1, last_y) not in cells_set:
route_extended.append(route + [(last_x - 1, last_y)])
if last_x - 1 >= 0 and last_y + 1 < self.n and (last_x - 1, last_y + 1) not in cells_set:
route_extended.append(route+[(last_x - 1, last_y + 1)])
if last_x - 1 >= 0 and last_y - 1 >= 0 and (last_x - 1, last_y - 1) not in cells_set:
route_extended.append(route + [(last_x - 1, last_y - 1)])
if last_y - 1 >= 0 and (last_x, last_y - 1) not in cells_set:
route_extended.append(route + [(last_x, last_y - 1)])
if last_x + 1 < self.m and last_y - 1 >= 0 and (last_x + 1, last_y - 1) not in cells_set:
route_extended.append(route + [(last_x + 1, last_y - 1)])
'''
return route_extended
def search_words(self):
'''Return all words found in the grid from a given dictionary
Optimized by stopping searching longer route than maximum length of the dictionary word
'''
all_cells = [[(i, j)] for i in range(self.m) for j in range(self.n)]
routes = deque(all_cells)
words_found = set()
while routes:
route = routes.popleft() #queue functionality
word = self.route_to_word(route)
if self.dict.isPrefix(word):
if self.dict.isWord(word):
words_found.add(str(word))
if len(route) < self.dict.max_word_len:
routes.extendleft(self.route_extend(route))
return words_found
class TestCharGrid(unittest.TestCase):
'''Unit tests to check word search in the grid
'''
def setUp(self):
self.d = Dictionary(['cats', 'dogs', 'cars', ' ', ''])
self.charGrid = CharGrid(np.array([['a', 'a', 'r'], ['t', 'c', 'd']]),\
['CAR', 'CARD', 'CART', 'CAT'])
def test_non_char_entry(self):
self.assertRaises(CharGrid(np.array([['a', '2', 'r'], ['t', 'c', 'd']]),\
['CAR', 'CARD', 'CART', 'CAT']))
def test_non_matrix_entry(self):
self.assertRaises(CharGrid(np.array([['a', '2'], ['t', 'c', 'd']]),\
['CAR', 'CARD', 'CART', 'CAT']))
def test_normal_input(self):
self.assertEqual(self.charGrid.search_words(), set(['car', 'card', 'cat']))
if __name__ == '__main__':
unittest.main()