Skip to content

Commit fd67194

Browse files
committed
Improve variable names and eliminate unneeded encoding
1 parent 7e1498d commit fd67194

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

congress.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
vote_value = {'Nay': -1, 'Not Voting': 0, 'Yea': 1} # type: Dict[str, VoteValue]
1919
accumulated_record = defaultdict(list) # type: DefaultDict[Senator, List[VoteValue]]
2020
for filename in glob.glob('congress_data/*.csv'):
21-
with open(filename, encoding='utf-8') as f:
21+
with open(filename) as f:
2222
reader = csv.reader(f)
2323
vote_topic = next(reader)
2424
headers = next(reader)
@@ -27,7 +27,7 @@
2727
accumulated_record[senator].append(vote_value[vote])
2828

2929
# Transform record into plain dict mapping a senator to a tuple of vote values
30-
record = {senator: tuple(votes) for senator, votes in accumulated_record.items() } # type: Dict[Senator, Tuple[VoteValue, ...]]
30+
record = {senator: tuple(votes) for senator, votes in accumulated_record.items()} # type: Dict[Senator, Tuple[VoteValue, ...]]
3131

3232
# Use k-means to locate the cluster centroids and assign senators to the nearest cluster
3333
centroids = k_means(record.values(), k=3, iterations=50)

kmeans.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,19 @@ def mean(data: Iterable[float]) -> float:
1919

2020
def dist(p: Point, q: Point, sqrt=sqrt, fsum=fsum, zip=zip) -> float:
2121
'Euclidean distance'
22-
return sqrt(fsum((x - y) ** 2.0 for x, y in zip(p, q)))
22+
return sqrt(fsum((x1 - x2) ** 2.0 for x1, x2 in zip(p, q)))
2323

2424
def assign_data(centroids: Sequence[Centroid], data: Iterable[Point]) -> Dict[Centroid, Sequence[Point]]:
2525
'Assign data the closest centroid'
2626
d : DefaultDict[Point, List[Point]] = defaultdict(list)
27-
for p in data:
28-
centroid: Point = min(centroids, key=partial(dist, p))
29-
d[centroid].append(p)
27+
for point in data:
28+
centroid: Point = min(centroids, key=partial(dist, point))
29+
d[centroid].append(point)
3030
return dict(d)
3131

3232
def compute_centroids(groups: Iterable[Sequence[Point]]) -> List[Centroid]:
3333
'Compute the centroid of each group'
34-
return [tuple(map(mean, zip(*pts))) for pts in groups]
34+
return [tuple(map(mean, zip(*group))) for group in groups]
3535

3636
def quality(labeled: Dict[Centroid, Sequence[Point]]) -> float:
3737
'Mean value of squared distances from data to its assigned centroid'

0 commit comments

Comments
 (0)