|
1 | 1 | import numpy as np
|
| 2 | +import pandas as pd |
| 3 | +from tabulate import tabulate |
| 4 | + |
| 5 | + |
| 6 | +def pprint_df(df, tablefmt='psql'): |
| 7 | + print(tabulate(df, headers='keys', tablefmt=tablefmt)) |
| 8 | + |
2 | 9 |
|
3 | 10 | if __name__ == "__main__":
|
4 |
| - for dataset_name in ['wikipedia', 'reddit', 'mooc', 'lastfm', 'myket', 'enron', 'SocialEvo', 'uci', |
5 |
| - 'Flights', 'CanParl', 'USLegis', 'UNtrade', 'UNvote', 'Contacts']: |
| 11 | + all_datasets = ['wikipedia', 'reddit', 'mooc', 'lastfm', 'myket', 'enron', 'SocialEvo', 'uci', |
| 12 | + 'Flights', 'CanParl', 'USLegis', 'UNtrade', 'UNvote', 'Contacts'] |
| 13 | + records = [] |
| 14 | + for dataset_name in sorted(all_datasets, key=lambda v: v.upper()): |
6 | 15 | edge_raw_features = np.load('../processed_data/{}/ml_{}.npy'.format(dataset_name, dataset_name))
|
7 | 16 | node_raw_features = np.load('../processed_data/{}/ml_{}_node.npy'.format(dataset_name, dataset_name))
|
| 17 | + info = {'name': dataset_name, |
| 18 | + 'num_nodes': node_raw_features.shape[0] - 1, |
| 19 | + 'node_fea_dim': node_raw_features.shape[-1], |
| 20 | + 'num_edges': edge_raw_features.shape[0] - 1, |
| 21 | + 'edge_fea_dim': edge_raw_features.shape[-1]} |
| 22 | + records.append(info) |
8 | 23 |
|
9 |
| - print('Statistics of dataset ', dataset_name) |
10 |
| - print('number of nodes ', node_raw_features.shape[0] - 1) |
11 |
| - print('number of node features ', node_raw_features.shape[1]) |
12 |
| - print('number of edges ', edge_raw_features.shape[0] - 1) |
13 |
| - print('number of edge features ', edge_raw_features.shape[1]) |
14 |
| - print('====================================') |
| 24 | + info_df = pd.DataFrame.from_records(records) |
| 25 | + pprint_df(info_df) |
0 commit comments