Improve variable names and eliminate unneeded encoding

rhettinger · rhettinger · commit fd671947dcfc · 2017-06-02T07:55:25.000-07:00
diff --git a/congress.py b/congress.py
@@ -18,7 +18,7 @@
 vote_value = {'Nay': -1, 'Not Voting': 0, 'Yea': 1} # type: Dict[str, VoteValue]
 accumulated_record = defaultdict(list)              # type: DefaultDict[Senator, List[VoteValue]]
 for filename in glob.glob('congress_data/*.csv'):
-    with open(filename, encoding='utf-8') as f:
+    with open(filename) as f:
         reader = csv.reader(f)
         vote_topic = next(reader)
         headers = next(reader)
@@ -27,7 +27,7 @@
             accumulated_record[senator].append(vote_value[vote])
 
 # Transform record into plain dict mapping a senator to a tuple of vote values
-record = {senator: tuple(votes) for senator, votes in accumulated_record.items() } # type: Dict[Senator, Tuple[VoteValue, ...]]
+record = {senator: tuple(votes) for senator, votes in accumulated_record.items()} # type: Dict[Senator, Tuple[VoteValue, ...]]
 
 # Use k-means to locate the cluster centroids and assign senators to the nearest cluster
 centroids = k_means(record.values(), k=3, iterations=50)
diff --git a/kmeans.py b/kmeans.py
@@ -19,19 +19,19 @@ def mean(data: Iterable[float]) -> float:
 
 def dist(p: Point, q: Point, sqrt=sqrt, fsum=fsum, zip=zip) -> float:
     'Euclidean distance'
-    return sqrt(fsum((x - y) ** 2.0 for x, y in zip(p, q)))
+    return sqrt(fsum((x1 - x2) ** 2.0 for x1, x2 in zip(p, q)))
 
 def assign_data(centroids: Sequence[Centroid], data: Iterable[Point]) -> Dict[Centroid, Sequence[Point]]:
     'Assign data the closest centroid'
     d : DefaultDict[Point, List[Point]] = defaultdict(list)
-    for p in data:
-        centroid: Point = min(centroids, key=partial(dist, p))
-        d[centroid].append(p)
+    for point in data:
+        centroid: Point = min(centroids, key=partial(dist, point))
+        d[centroid].append(point)
     return dict(d)
 
 def compute_centroids(groups: Iterable[Sequence[Point]]) -> List[Centroid]:
     'Compute the centroid of each group'
-    return [tuple(map(mean, zip(*pts))) for pts in groups]
+    return [tuple(map(mean, zip(*group))) for group in groups]
 
 def quality(labeled: Dict[Centroid, Sequence[Point]]) -> float:
     'Mean value of squared distances from data to its assigned centroid'