-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhw2_main.py
98 lines (79 loc) · 3.01 KB
/
hw2_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import matplotlib
matplotlib.use('TKAgg')
import matplotlib.pyplot as plt
import os
import pandas as pd
import linreg_main
import matcomp_main
import numpy as np
from ast import literal_eval
DEFAULT_CFG_FILE = os.path.join('hw2', 'cfg', 'hw2-linreg.json')
plt.rcParams["figure.figsize"] = [20, 10]
axis_fontsize = 18
title_fontsize = 26
if __name__ == '__main__':
print('=== MLOPT HW2: Aviv Rosenberg & Yonatan Elul')
print('=== ========================================')
print('=== Running matrix completion models...')
names = {
'rp': "Rank Projection",
'ff': "Factorized Form:",
'cr': "Convex Relaxation",
}
params = {
'rp': None,
'ff': None,
'cr': None,
}
test_mse = {
'rp': None,
'ff': None,
'cr': None,
}
for dataset_name in matcomp_main.DATASETS.keys():
for model_name in matcomp_main.MODELS.keys():
cv_results_file = os.path.join(matcomp_main.OUT_DIR_DEFAULT,
f'cv-{model_name}.tsv')
if os.path.isfile(cv_results_file):
df = pd.read_csv(cv_results_file, sep='\t')
df = df.loc[df['rank_test_score'] == 1]['params']
best_params = literal_eval(df.values[0])
print(f'=== Using best cross-validated params: {best_params}')
else:
best_params = {}
print(f'=== No cross-validation results file, using default '
f'parameters')
res = matcomp_main.run_training(
model_name, dataset_name, matcomp_main.OUT_DIR_DEFAULT,
no_test_set=False, test_ratio=1 / 3, random_seed=42,
**best_params
)
# Save intermediate results, to be plotted at the end of the loop
if res['model'].name == 'cr':
params[res['model'].name] = res['model'].tau
else:
params[res['model'].name] = res['model'].rank
test_mse[res['model'].name] = res['model'].test_mse_
max_iter = res['model'].max_iter
if dataset_name[-1] == 'k':
ds_name = "MovieLens 100K"
else:
ds_name = "MovieLens 1M"
iterations = np.arange(start=1, stop=(max_iter + 1))
plt.figure()
plt.plot(iterations, test_mse['rp'], 'b')
plt.plot(iterations, test_mse['ff'], 'r')
plt.plot(iterations, test_mse['cr'], 'k')
plt.legend([
names['rp'] + ' - Rank = ' + f"{params['rp']}",
names['ff'] + ' - Rank = ' + f"{params['ff']}",
names['cr'] + ' - $\\tau$ = ' + f"{params['cr']}",
])
plt.title(ds_name)
plt.xlabel("Iteration #")
plt.ylabel("Test Set MSE")
plt.show()
print('=== Running linear regression models...')
print('=== ===================================')
plot_filenames = linreg_main.run_multi(DEFAULT_CFG_FILE, parallel=True)
print(f'=== Saved plot files: {plot_filenames}')