-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
124 lines (111 loc) · 5.99 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image
class HandDataset(Dataset):
def __init__(self, dataframe, labels, image_dir, transform=None):
self.dataframe = dataframe
self.labels1, self.labels2, self.labels3 = labels
self.image_dir = image_dir
self.transform = transform
def __len__(self):
return len(self.dataframe)
def __getitem__(self, idx):
img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx]['imageName'])
image = Image.open(img_name).convert('RGB')
label1 = self.labels1.iloc[idx].values.astype(float)
label2 = self.labels2.iloc[idx].values.astype(float)
label3 = self.labels3.iloc[idx].values.astype(float)
if self.transform:
image = self.transform(image)
return image, [torch.tensor(label1), torch.tensor(label2), torch.tensor(label3)]
class Make_dataset:
def __init__(self, image_directory = 'Hands', info = 'HandInfo.csv', batch_size = 20,
img_height = 224, img_width = 224, split_size = 0.2):
#parameters
self.image_directory = image_directory
self.batch_size = batch_size
self.img_height = img_height
self.img_width = img_width
self.split_size = split_size
self.info = info
# Define the data augmentation and normalization transforms
self.data_transforms = {
'train': transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomApply([transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2)], p=0.5),
transforms.Resize((self.img_height, self.img_width)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize((self.img_height, self.img_width)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]),
}
def define_data_frames(self):
df = pd.read_csv(self.info)
age_bins = [0, 21, 22, 23, 24, 31, 76]
labels = np.arange(6)
df['age_category'] = pd.cut(df['age'], bins=age_bins, labels=labels, right=False, include_lowest=True)
df = df[df.accessories == 0]
df['p'] = np.where(df.aspectOfHand.str.startswith('p') == True, 1, 0)
df['r'] = np.where(df.aspectOfHand.str.endswith('right') == True, 1, 0)
self.df_p_r = df[(df.p == 1) & (df.r == 1)]
self.df_p_l = df[(df.p == 1) & (df.r == 0)]
self.df_d_r = df[(df.p == 0) & (df.r == 1)]
self.df_d_l = df[(df.p == 0) & (df.r == 0)]
# Split data into training and validation sets
def split_data(self, df):
self.train_df, self.val_df = train_test_split(df, test_size=self.split_size, stratify=df['id'], random_state=42)
return self.train_df, self.val_df
def encode(self, col, encoder, val_df, train_df):
encoder.fit(train_df[col])
train_df[col] = encoder.transform(train_df[col])
val_df[col] = encoder.transform(val_df[col])
def encoder(self):
train_df_p_r, val_df_p_r = self.split_data(self.df_p_r)
train_df_p_l, val_df_p_l = self.split_data(self.df_p_l)
train_df_d_r, val_df_d_r = self.split_data(self.df_d_r)
train_df_d_l, val_df_d_l = self.split_data(self.df_d_l)
train_df = pd.concat([train_df_p_r, train_df_d_r, train_df_p_l, train_df_d_l])
val_df = pd.concat([val_df_p_r, val_df_d_r, val_df_p_l, val_df_d_l])
self.val_dic = {'Total': val_df, 'Palmer Right': val_df_p_r, 'Palmer Left': val_df_p_l, 'Dorsal Right': val_df_d_r, 'Dorsal Left': val_df_d_l}
pairs = [('id', LabelEncoder()), ('age_category', LabelEncoder()), ('gender', LabelEncoder())]
for pair in pairs:
self.encode(pair[0], pair[1], val_df, train_df)
self.train_id_one_hot = pd.get_dummies(train_df['id'])
self.val_id_one_hot = pd.get_dummies(val_df['id'])
self.train_age_one_hot = pd.get_dummies(train_df['age_category'])
self.val_age_one_hot = pd.get_dummies(val_df['age_category'])
self.train_gender_one_hot = pd.get_dummies(train_df['gender'])
self.val_gender_one_hot = pd.get_dummies(val_df['gender'])
self.num_classes1 = len(pairs[0][1].classes_)
self.num_classes2 = len(pairs[1][1].classes_)
self.num_classes3 = len(pairs[2][1].classes_)
def create_datasets(infer = None):
make_dataset = Make_dataset()
if not infer:
make_dataset.define_data_frames()
make_dataset.encoder()
# Create datasets
train_dataset = HandDataset(make_dataset.train_df,
[make_dataset.train_id_one_hot, make_dataset.train_age_one_hot, make_dataset.train_gender_one_hot],
make_dataset.image_directory, transform=make_dataset.data_transforms['train'])
val_dataset = HandDataset(make_dataset.val_df,
[make_dataset.val_id_one_hot, make_dataset.val_age_one_hot, make_dataset.val_gender_one_hot],
make_dataset.image_directory, transform=make_dataset.data_transforms['val'])
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=make_dataset.batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=make_dataset.batch_size, shuffle=False)
return train_loader, val_loader, make_dataset
else:
dataset = datasets.ImageFolder(root=infer, transform=make_dataset.data_transforms['val'])
val_loader = DataLoader(dataset, batch_size=make_dataset.batch_size, shuffle=False)
return None, val_loader, make_dataset