forked from hiram64/ocsvm-anomaly-detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmakeDatasetFolder.py
164 lines (131 loc) · 5.29 KB
/
makeDatasetFolder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 5 17:24:08 2020
@author: Šimon Bilík
"""
# This script prepares the dataset according to the set parameters and divides it into the required directory structure
from PIL import Image
from math import floor
import pandas as pd
import os
# Set if the images should be cropped and resized
crop = 1
res = 1
# Set the image height and width
imWidth = 256
imHeight = 256
# Set the dataset parameters
nTrain_ok = 1000
nTrain_nok = 50
nValid_ok = 0
nValid_nok = 0
nTest_ok = 200
nTest_nok = 200
# Defect ratios
rNComplete = 0.4
rSObject = 0.3
rCDefect = 0.3
# Specify the image, annotations and destination path
imPath = "D:\\Programovani\\Datasets\\IndustryBiscuit\\Images"
anPath = "D:\\Programovani\\Datasets\\IndustryBiscuit\\AnotaceFinal.csv"
dsPath = "D:\\Programovani\\Datasets\\IndustryBiscuit_KerasApp"
# Counters initialization
cTrain_ok = 0
cValid_ok = 0
cTest_ok = 0
cTrainNC_nok = 0
cValidNC_nok = 0
cTestNC_nok = 0
cTrainSO_nok = 0
cValidSO_nok = 0
cTestSO_nok = 0
cTrainCD_nok = 0
cValidCD_nok = 0
cTestCD_nok = 0
# Defect limits
nTrNC = floor(nTrain_nok * rNComplete)
nVaNC = floor(nValid_nok * rNComplete)
nTeNC = floor(nTest_nok * rNComplete)
nTrSO = floor(nTrain_nok * rSObject)
nVaSO = floor(nValid_nok * rSObject)
nTeSO = floor(nTest_nok * rSObject)
nTrCD = floor(nTrain_nok * rCDefect)
nVaCD = floor(nValid_nok * rCDefect)
nTeCD = floor(nTest_nok * rCDefect)
# Create the directories for the image storage
if not os.path.exists(dsPath):
# Create the folder structure
os.mkdir(dsPath)
os.mkdir(dsPath + '\\train')
os.mkdir(dsPath + '\\train' + '\\ok')
os.mkdir(dsPath + '\\train' + '\\nok')
os.mkdir(dsPath + '\\valid')
os.mkdir(dsPath + '\\valid' + '\\ok')
os.mkdir(dsPath + '\\valid' + '\\nok')
os.mkdir(dsPath + '\\test')
os.mkdir(dsPath + '\\test' + '\\ok')
os.mkdir(dsPath + '\\test' + '\\nok')
# Load the filenames and the annotation from the .csv file
data = pd.read_csv(anPath, usecols= ['file','licenceCode', 'upperLeftX', 'upperLeftY', 'lowerRightX', 'lowerRightY'])
augm = 1226
for key in range(1, 1226):
for temp in range(0, 4):
if temp == 0:
index = key
else:
index = augm
augm += 1
value = data.iloc[index - 1, :]
# Open the image file
im = Image.open(os.path.join(imPath, value[0]))
# Crop the images if set
if crop == 1:
im = im.crop((value[1], value[2], value[3], value[4]))
# Resize the image if set
if res == 1:
im = im.resize((imWidth, imHeight))
# Split the images to the categories
if value[5] == "Bez_Vady":
if (cTrain_ok < nTrain_ok):
im.save(os.path.join(dsPath + '\\train' + '\\ok', value[0]), format='jpeg')
cTrain_ok += 1
elif (cValid_ok < nValid_ok):
im.save(os.path.join(dsPath + '\\valid' + '\\ok', value[0]), format='jpeg')
cValid_ok += 1
elif (cTest_ok < nTest_ok):
im.save(os.path.join(dsPath + '\\test' + '\\ok', value[0]), format='jpeg')
cTest_ok += 1
elif value[5] == "Vada_Neuplnost":
if (cTrainNC_nok < nTrNC):
im.save(os.path.join(dsPath + '\\train' + '\\nok', value[0]), format='jpeg')
cTrainNC_nok += 1
elif (cValidNC_nok < nVaNC):
im.save(os.path.join(dsPath + '\\valid' + '\\nok', value[0]), format='jpeg')
cValidNC_nok += 1
elif (cTestNC_nok < nTeNC):
im.save(os.path.join(dsPath + '\\test' + '\\nok', value[0]), format='jpeg')
cTestNC_nok += 1
elif value[5] == "Vada_CiziObjekt":
if (cTrainSO_nok < nTrSO):
im.save(os.path.join(dsPath + '\\train' + '\\nok', value[0]), format='jpeg')
cTrainSO_nok += 1
elif (cValidSO_nok < nVaSO):
im.save(os.path.join(dsPath + '\\valid' + '\\nok', value[0]), format='jpeg')
cValidSO_nok += 1
elif (cTestSO_nok < nTeSO):
im.save(os.path.join(dsPath + '\\test' + '\\nok', value[0]), format='jpeg')
cTestSO_nok += 1
elif value[5] == "Vada_NestandardniBarva":
if (cTrainCD_nok < nTrCD):
im.save(os.path.join(dsPath + '\\train' + '\\nok', value[0]), format='jpeg')
cTrainCD_nok += 1
elif (cValidCD_nok < nVaCD):
im.save(os.path.join(dsPath + '\\valid' + '\\nok', value[0]), format='jpeg')
cValidCD_nok += 1
elif (cTestCD_nok < nTeCD):
im.save(os.path.join(dsPath + '\\test' + '\\nok', value[0]), format='jpeg')
cTestCD_nok += 1
# Print dataset statistics TODO
print ("Dataset created successfully...")
else:
print("Folder structure with the dataset already exists...")