Skip to content

Commit 24cb1ef

Browse files
authored
Hotfixed fallback for Kaggle API
Working on new algo to reduce API calls. This is a temporary solution
1 parent 31b8489 commit 24cb1ef

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

kaggle_json.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def extract_kaggle(file_path):
1717
kaggle_identifiers = [f"{username}/{dataset}" for username, dataset in matches]
1818
return kaggle_identifiers
1919

20-
dir = "_datasets"
20+
dir = "blastnet.github.io/_datasets"
2121
total_bytes = 0
2222
total_size = 0
2323
json_dump = {}
@@ -75,25 +75,33 @@ def format_bytes(num_bytes):
7575
num_bytes /= factor
7676
unit_index += 1
7777
return f"{num_bytes:.3f} {units[unit_index]}"
78+
def unformat_bytes(string):
79+
units = ['B','KB','MB','GB','TB','PB','EB','ZB','YB']
80+
num,unit = string.split(" ")
81+
factor = 1000
82+
return float(num)*(factor**(units.index(unit)))
7883

7984
# SPECIFIC DATASET STATISTICS TO OUTPUT
8085
# Take the maximum of views/downloads from each of the sub-datasets
8186
# More representative than summing, since the same user would likely view multiple sub-datasets
82-
ds_size = format_bytes(np.sum(size_in_bytes))
87+
ds_size_raw = np.sum(size_in_bytes)
88+
ds_size = format_bytes(ds_size_raw)
8389
ds_views = np.max(views) #np.sum(views)
8490
ds_downs = np.max(downloads) #np.sum(downloads)
8591
print(f'{filename} ({ds_size}) processed. {ds_views} views, {ds_downs} downloads.')
8692

87-
# Use old data as fallback
88-
if not ds_size:
93+
if not ds_size_raw:
94+
# Use old data as fallback
8995
kaggle_stats = json.loads(gist.files['kaggle_stats.json'].content)
9096
kaggle_stats = kaggle_stats[filename]
91-
92-
# Save as dictionary and throw it to the preamble
93-
kaggle_stats = {
94-
'size': ds_size,
95-
'views': ds_views,
96-
'downloads': ds_downs,
97+
size_in_bytes = unformat_bytes(kaggle_stats['size'])
98+
downloads = kaggle_stats['downloads']
99+
else:
100+
# Save as dictionary and throw it to the preamble
101+
kaggle_stats = {
102+
'size': ds_size,
103+
'views': ds_views,
104+
'downloads': ds_downs,
97105
}
98106
json_dump[filename] = kaggle_stats
99107
total_bytes += int(np.sum(downloads*size_in_bytes))
@@ -102,6 +110,9 @@ def format_bytes(num_bytes):
102110
if not total_bytes:
103111
raise Exception("Zero data encountered, exiting")
104112
exit()
113+
#old_data = json.loads(gist.files['kaggle_stats.json'].content)
114+
#total_bytes = old_data['total_bytes']
115+
#total_size = old_data['total_size']
105116

106117
json_dump['total_bytes'] = total_bytes
107118
json_dump['total_size'] = total_size

0 commit comments

Comments
 (0)