icon-lab
diff --git a/‎.DS_Store
6 KB b/‎.DS_Store
6 KB
diff --git a/‎Dataset/Loaders/__init__.py b/‎Dataset/Loaders/__init__.py
diff --git a/‎Dataset/Loaders/__pycache__/__init__.cpython-36.pyc
131 Bytes b/‎Dataset/Loaders/__pycache__/__init__.cpython-36.pyc
131 Bytes
diff --git a/‎Dataset/Loaders/__pycache__/__init__.cpython-38.pyc
154 Bytes b/‎Dataset/Loaders/__pycache__/__init__.cpython-38.pyc
154 Bytes
diff --git a/‎Dataset/Loaders/__pycache__/abide1Loader.cpython-36.pyc
2.78 KB b/‎Dataset/Loaders/__pycache__/abide1Loader.cpython-36.pyc
2.78 KB
diff --git a/‎Dataset/Loaders/__pycache__/abide1Loader.cpython-38.pyc
2.8 KB b/‎Dataset/Loaders/__pycache__/abide1Loader.cpython-38.pyc
2.8 KB
diff --git a/‎Dataset/Loaders/__pycache__/hcpRestLoader.cpython-36.pyc
2.28 KB b/‎Dataset/Loaders/__pycache__/hcpRestLoader.cpython-36.pyc
2.28 KB
diff --git a/‎Dataset/Loaders/__pycache__/hcpRestLoader.cpython-38.pyc
2.36 KB b/‎Dataset/Loaders/__pycache__/hcpRestLoader.cpython-38.pyc
2.36 KB
diff --git a/‎Dataset/Loaders/__pycache__/hcpRestNew.cpython-38.pyc
3.58 KB b/‎Dataset/Loaders/__pycache__/hcpRestNew.cpython-38.pyc
3.58 KB
diff --git a/‎Dataset/Loaders/__pycache__/hcpTaskLoader.cpython-36.pyc
1.78 KB b/‎Dataset/Loaders/__pycache__/hcpTaskLoader.cpython-36.pyc
1.78 KB
diff --git a/‎Dataset/Loaders/__pycache__/hcpTaskLoader.cpython-38.pyc
1.8 KB b/‎Dataset/Loaders/__pycache__/hcpTaskLoader.cpython-38.pyc
1.8 KB
diff --git a/‎Dataset/Loaders/__pycache__/id1000Loader.cpython-38.pyc
1.82 KB b/‎Dataset/Loaders/__pycache__/id1000Loader.cpython-38.pyc
1.82 KB
diff --git a/‎Dataset/Loaders/__pycache__/piop1Loader.cpython-38.pyc
1.78 KB b/‎Dataset/Loaders/__pycache__/piop1Loader.cpython-38.pyc
1.78 KB
diff --git a/‎Dataset/Loaders/__pycache__/piop2Loader.cpython-38.pyc
1.78 KB b/‎Dataset/Loaders/__pycache__/piop2Loader.cpython-38.pyc
1.78 KB
diff --git a/‎Dataset/Loaders/__pycache__/piopsLoader.cpython-38.pyc
482 Bytes b/‎Dataset/Loaders/__pycache__/piopsLoader.cpython-38.pyc
482 Bytes
diff --git a/‎Dataset/Loaders/hcpRestNew.py
+148 b/‎Dataset/Loaders/hcpRestNew.py
+148
diff --git a/‎Dataset/__init__.py b/‎Dataset/__init__.py
diff --git a/‎Dataset/__pycache__/__init__.cpython-38.pyc
146 Bytes b/‎Dataset/__pycache__/__init__.cpython-38.pyc
146 Bytes
diff --git a/‎Dataset/__pycache__/dataset.cpython-38.pyc
3.48 KB b/‎Dataset/__pycache__/dataset.cpython-38.pyc
3.48 KB
diff --git a/‎Dataset/dataset.py
+100 b/‎Dataset/dataset.py
+100
@@ -0,0 +1,148 @@
+
+import pickle
+import pandas
+import numpy as np
+import random
+import torch
+
+
+def loadTorchSave(atlas):
+
+    baseFolderName = None ### replace with your data directory
+
+    if(atlas == "AAL"):
+        fileName = baseFolderName + "/hcpRest_aal.save"
+    elif(atlas == "Schaefer"):
+        fileName = baseFolderName + "/hcpRest_schaefer.save"
+
+
+    subjectDict = torch.load(fileName)
+
+    subjectDatas = []
+    subjectIds = []
+
+    for subjectId in subjectDict:
+
+        subjectData = subjectDict[subjectId]
+        
+        if subjectData.shape[0] != 1200:
+            print("Passing short subject")
+            continue
+
+        subjectIds.append(subjectId)
+        subjectDatas.append(subjectData.T)
+
+
+    return subjectDatas, subjectIds
+
+
+
+def getLabels(subjectIds, targetTask):
+    
+    
+    temp = pandas.read_csv(".../Datasets/HCP_1200/Preprocessed/pheno.csv").to_numpy() ### replace with the pheno.csv file directory
+    
+    phenoInfos = {}
+    for row in temp:
+        phenoInfos[str(row[0])] = {"gender": row[3], "age" : row[4], "fIQ" : row[121]}
+
+    labels = []
+    ages = []
+    
+    badSubjIds = []
+    
+    for subjectId in subjectIds:
+        
+        label = phenoInfos[subjectId][targetTask]
+
+        agePheno = phenoInfos[subjectId]["age"]
+        if("-" not in agePheno):
+            age = float(agePheno.split("+")[0])
+        else:    
+            age = (float(agePheno.split("-")[0])) 
+            
+        ages.append(age)
+
+        if(targetTask == "gender"):
+            
+            label = 1 if label == 'M' else 0
+            
+        if(targetTask == "age"):
+            
+            if("-" not in label):
+                label = float(label.split("+")[0])
+            else:    
+                label = (float(label.split("-")[0]) + float(label.split("-")[1])) / 2.0
+                
+        if(targetTask == "fIQ"):
+            if(np.isnan(label)):
+                badSubjIds.append(subjectId)
+
+        labels.append(label)
+
+    return labels, badSubjIds, ages
+    
+
+def hcpRestLoader(atlas, targetTask):
+
+
+
+
+    if(atlas == "AAL" or atlas == "Schaefer"):
+
+        subjectDatas_, subjectIds_ = loadTorchSave(atlas)
+    
+    
+    if(targetTask != None):    
+        labels_, badSubjIds, ages_ = getLabels(subjectIds_, targetTask)
+
+    subjectDatas = []
+    subjectIds = []
+
+    if(targetTask != None):    
+        labels = []
+        ages = []
+
+        for i, subjectId in enumerate(subjectIds_):
+            if(not subjectId in badSubjIds):
+                subjectDatas.append(subjectDatas_[i])
+                subjectIds.append(subjectIds_[i])
+
+                ages.append(ages_[i])
+                labels.append(labels_[i])
+
+    else:
+
+        subjectDatas = subjectDatas_
+        subjectIds = subjectIds_
+        
+    classWeights = []
+    if(targetTask == "gender"):
+        for i in range(np.max(labels) + 1):
+            classWeights.append(float(np.sum(np.array(labels) == i)))
+        classWeights = 1/np.array(classWeights)
+        classWeights = classWeights / np.sum(classWeights)                
+
+
+
+    random.Random(12).shuffle(subjectDatas)
+    random.Random(12).shuffle(subjectIds)  
+
+    if(targetTask != None):    
+        random.Random(12).shuffle(labels)
+        random.Random(12).shuffle(ages)    
+
+
+    
+    if(targetTask != None):    
+        print("hcp rest data : # subjects = {}, chance level = {}".format(len(labels), np.sum(labels) / len(labels)))
+
+            
+    if(targetTask != None):
+        if(targetTask == "gender"):
+            return subjectDatas, labels, subjectIds
+        else:
+            return subjectDatas, labels, subjectIds, classWeights, ages, None, None
+    else:
+        return subjectDatas, subjectIds
+
@@ -0,0 +1,100 @@
+
+from torch.utils.data import Dataset, DataLoader
+from sklearn.model_selection import StratifiedKFold
+from random import shuffle, randrange
+import numpy as np
+import random
+
+from .Loaders.hcpRestNew import hcpRestLoader
+
+
+loaderMapper = {
+    "hcpRest" : hcpRestLoader,
+    # add other datasets if you want
+}
+
+def getDataset(mainOptions):
+
+    # if(mainOptions.supervision == "supervised"):
+    return SupervisedDataset(mainOptions)
+
+
+
+class SupervisedDataset(Dataset):
+    
+    def __init__(self, mainOptions):
+
+        self.batchSize = mainOptions.batchSize
+        self.dynamicLength = mainOptions.dynamicLength
+        self.foldCount = mainOptions.kFold
+
+        loader = loaderMapper[mainOptions.datasets[0]]
+
+        self.kFold = StratifiedKFold(mainOptions.kFold, shuffle=True, random_state=0) if mainOptions.kFold is not None else None
+        self.k = None
+
+        self.data, self.labels, self.subjectIds = loader(mainOptions.atlas, mainOptions.targetTask)
+        
+        self.targetData = None
+        self.targetLabel = None
+
+    def __len__(self):
+        return len(self.data) if isinstance(self.targetData, type(None)) else len(self.targetData)
+
+    def get_nOfTrains_perFold(self):
+        
+        return len(self.data)        
+
+    def setFold(self, fold, train=True):
+
+        self.k = fold
+        self.train = train
+
+        if(self.kFold == None): # if this is the case, train must be True
+            trainIdx = list(range(len(self.data)))
+        else:
+            trainIdx, testIdx = list(self.kFold.split(self.data, self.labels))[fold]        
+
+        random.Random(12).shuffle(trainIdx)
+
+        self.targetData = [self.data[idx] for idx in trainIdx] if train else [self.data[idx] for idx in testIdx]
+        self.targetLabels = [self.labels[idx] for idx in trainIdx] if train else [self.labels[idx] for idx in testIdx]
+        self.targetSubjIds = [self.subjectIds[idx] for idx in trainIdx] if train else [self.subjectIds[idx] for idx in testIdx]
+
+    def getFold(self, fold, train=True):
+        
+        self.setFold(fold, train)
+
+        if(train):
+            return DataLoader(self, batch_size=self.batchSize, shuffle=False)
+        else:
+            return DataLoader(self, batch_size=1, shuffle=False)            
+
+
+    def __getitem__(self, idx):
+
+        subject = self.targetData[idx]
+        label = self.targetLabels[idx]
+        subjId = self.targetSubjIds[idx]
+
+        # normalize timeseries
+        timeseries = subject # (numberOfRois, time)
+        timeseries = (timeseries - np.mean(timeseries, axis=1, keepdims=True)) / np.std(timeseries, axis=1, keepdims=True)
+
+        # dynamic sampling if train
+        if(self.train):
+            
+            if(timeseries.shape[1] < self.dynamicLength):
+                print(timeseries.shape[1], self.dynamicLength)
+
+            samplingInit = 0 if timeseries.shape[1] == self.dynamicLength else randrange(timeseries.shape[1] - self.dynamicLength)
+            timeseries = timeseries[:, samplingInit : samplingInit + self.dynamicLength]
+
+        return {"timeseries" : timeseries.astype(np.float32), "label" : label, "subjId" : subjId}
+
+
+
+
+
+
+