sgrvinod · claytonbrown · Jun 22, 2022
diff --git a/create_input_files.py b/create_input_files.py
@@ -3,9 +3,9 @@
 if __name__ == '__main__':
     # Create input files (along with word map)
     create_input_files(dataset='coco',
-                       karpathy_json_path='../caption data/dataset_coco.json',
-                       image_folder='/media/ssd/caption data/',
+                       karpathy_json_path='./data_sets/dataset_coco.json',
+                       image_folder='./data_sets/training/',
                        captions_per_image=5,
                        min_word_freq=5,
-                       output_folder='/media/ssd/caption data/',
+                       output_folder='./data_sets/output/',
                        max_len=50)
diff --git a/utils.py b/utils.py
@@ -3,7 +3,10 @@
 import h5py
 import json
 import torch
-from scipy.misc import imread, imresize
+import numpy as np
+# from scipy.misc import imread, imresize
+import imageio
+from PIL import Image       
 from tqdm import tqdm
 from collections import Counter
 from random import seed, choice, sample
@@ -110,13 +113,21 @@ def create_input_files(dataset, karpathy_json_path, image_folder, captions_per_i
 
                 # Sanity check
                 assert len(captions) == captions_per_image
-
+
+                # READ image
+                # print("Processing: %s" % (impaths[i]))
+
                 # Read images
-                img = imread(impaths[i])
+                img = imageio.imread(impaths[i])
+                # img = np.array(Image.open(impaths[i]))
+
                 if len(img.shape) == 2:
                     img = img[:, :, np.newaxis]
                     img = np.concatenate([img, img, img], axis=2)
-                img = imresize(img, (256, 256))
+
+                # img = img.resize(img, (256, 256))
+                newsize = (256, 256)
+                img = np.array(Image.fromarray(img).resize(newsize))                    
                 img = img.transpose(2, 0, 1)
                 assert img.shape == (3, 256, 256)
                 assert np.max(img) <= 255