-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimulation.py
257 lines (244 loc) · 13.6 KB
/
simulation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
import sys
import numpy as np
import time
import os
import filecmp
import datetime
import random
import argparse
sys.path.append("../dnam/")
from processfile import ProcessFile
from log import get_logger
parser = argparse.ArgumentParser(description="Simulation file for dNAM")
parser.add_argument("-s", "--start", help="Starting file size in bytes", type=int, default=20)
parser.add_argument("-e", "--end", help="Ending file size in bytes", type=int, default=2000)
parser.add_argument("-g", "--gap", help="Gap between file size", type=int, default=40)
parser.add_argument("-ec", "--error_check", help="Maxiumum number of error that will be checked for each origami",
type=int, default=8)
parser.add_argument("-oc", "--copies_of_each_origami", help="A single origami will have multiple copy with different"
" level of error", type=int, default=10)
parser.add_argument("-fp", "--false_positive_per_origami", help="Number of false positive error that will be "
"added in each origami", type=int, default=0)
parser.add_argument("-a", "--average", help="The error will be distributed on average per origami", action="store_true")
parser.add_argument("-v", "--verbose", help="Print all the details", type=int, default=0)
args = parser.parse_args()
STARTING_FILE_SIZE = args.start # Starting file size in bytes for simulation
ENDING_FILE_SIZE = args.end # Ending file size in bytes for simulation
FILE_SIZE_GAP = args.gap # Interval size of the file for simulation
MAXIMUM_NUMBER_OF_ERROR_CHECKED_PER_ORIGAMI = args.error_check
MAXIMUM_NUMBER_OF_ERROR_INSERTED_PER_ORIGAMI = args.error_check
COPIES_OF_EACH_ORIGAMIES = args.copies_of_each_origami
FALSE_POSITIVE_PER_ORIGAMI = args.false_positive_per_origami
AVERAGE = args.average
VERBOSE = args.verbose
RESULT_DIRECTORY = "test_result"
DATA_DIRECTORY = "test_data"
CURRENT_DATE = str(datetime.datetime.now()).replace("-", "")[:8]
SIMULATION_DIRECTORY = RESULT_DIRECTORY + "/" + CURRENT_DATE + "_simulation_" + str(STARTING_FILE_SIZE) + "_" + str(
ENDING_FILE_SIZE)
logger = get_logger(VERBOSE, __name__)
def change_orientation(origamies):
"""
This will randomly alter the origami orientation
:param origamies:
:return:
"""
for i, single_origami in enumerate(origamies):
orientation_option = random.choice(range(4))
# orientation_option = 0
single_matrix = dnam_object.data_stream_to_matrix(single_origami.rstrip('\n'))
if orientation_option == 0:
single_matrix = single_matrix
elif orientation_option == 1:
single_matrix = np.flipud(single_matrix)
elif orientation_option == 2:
single_matrix = np.fliplr(single_matrix)
else:
single_matrix = np.fliplr(np.flipud(single_matrix))
origamies[i] = single_matrix
logger.info("Origami has been randomly oriented")
return origamies
def degrade(dnam_object, file_in, file_out, number_of_error, average=False, false_positive_per_origami=2, copies=10):
try:
degrade_file = open(file_out, "w")
encoded_file = open(file_in, "r")
except Exception as e:
logger.error(e)
total_error_inserted = 0
origami_number = 0
encode_file_list = encoded_file.readlines() * copies
random.shuffle(encode_file_list)
encode_file_list = change_orientation(encode_file_list)
false_positive_details = {}
total_origami = len(encode_file_list)
total_error = int(number_of_error * total_origami)
total_maximum_false_positive_insert = int(total_error * .3)
false_positive_inserted = 0
# Error will be introduced on average per origami
if average:
# Each origami will have different number of error
# Total number of error will be same
while total_error_inserted < total_error:
random_index = random.choice(range(total_origami))
if random_index not in false_positive_details:
false_positive_details[random_index] = 0
error_row = random.choice(range(6))
error_column = random.choice(range(8))
if encode_file_list[random_index][error_row][error_column] == 1:
encode_file_list[random_index][error_row][error_column] = 0
total_error_inserted += 1
elif false_positive_details[random_index] < false_positive_per_origami \
and false_positive_inserted < total_maximum_false_positive_insert:
encode_file_list[random_index][error_row][error_column] = 1
false_positive_details[random_index] += 1
total_error_inserted += 1
false_positive_inserted += 1
# Each origami will be fixed amount of error
else:
# each origami will have same number of error
for index in range(len(encode_file_list)):
error_inserted = 0
if index not in false_positive_details:
false_positive_details[index] = 0
while error_inserted < number_of_error: # In error only 1 will be 0. 0 won't be 1
error_row = random.choice(range(6))
error_column = random.choice(range(8))
if encode_file_list[index][error_row][error_column] == 1:
encode_file_list[index][error_row][error_column] = 0
total_error_inserted += 1
error_inserted += 1
elif false_positive_details[index] < false_positive_per_origami and \
false_positive_inserted < total_maximum_false_positive_insert:
encode_file_list[index][error_row][error_column] = 1
false_positive_details[index] += 1
total_error_inserted += 1
false_positive_inserted += 1
error_inserted += 1
for single_origami in encode_file_list:
degraded_origami = dnam_object.matrix_to_data_stream(single_origami)
degrade_file.write(degraded_origami + "\n")
origami_number += 1
degrade_file.close()
encoded_file.close()
logger.info("Error insertion done")
return total_error_inserted
# Create the simulation directory if it's not there already
if not os.path.isdir(SIMULATION_DIRECTORY):
os.makedirs(SIMULATION_DIRECTORY)
# Result file name
result_file_name = SIMULATION_DIRECTORY + "/overall_result_simulation.csv"
# create result file
try:
# If file exists we will just append the result otherwise create new file
if os.path.isfile(result_file_name):
result_file = open(result_file_name, "a")
else:
result_file = open(result_file_name, "w")
result_file.write("File size,Bits per origami,Redundancy,Origami without redundancy,Origami with redundancy,"
"Number of copies of each origami,Encoding time,"
"Number of error per origami,Total number of error,Total number of error detected,"
"Incorrect origami,Correct Origami,Missing origamies,Decoding time,status,threshold data,"
"threshold parity,false positive\n")
# Closing the file otherwise it will be copied on the multi processing
# Each process copied all the open file descriptor. If this we don't close this file here.
# Then this file descriptor will be copied by each process hence memory consumption will
# be more / we might get the error of too many file open
# So we will close this here before we call the multiprocessing
result_file.close()
except Exception as e:
logger.error("Couldn't open the result file")
logger.exception(e)
exit()
for file_size in list(range(STARTING_FILE_SIZE, ENDING_FILE_SIZE, FILE_SIZE_GAP)):
test_file_name = SIMULATION_DIRECTORY + "/test_" + str(file_size)
logger.info("working with file size: {file_size}".format(file_size=file_size))
# Generate random binary file for encoding
with open(test_file_name, "wb", 0) as random_file:
random_file.write(os.urandom(file_size))
# encode the randomly generated file
dnam_object = ProcessFile(redundancy=50, verbose=False)
encoded_file_name = test_file_name + "_encode"
decoded_file_name = test_file_name + "_decode"
start_time = time.time()
# Encode the file
segments, droplet, data_bit_per_origami, required_red = dnam_object.encode(test_file_name, encoded_file_name)
encoding_time = round((time.time() - start_time), 2)
for error_in_each_origami in range(MAXIMUM_NUMBER_OF_ERROR_CHECKED_PER_ORIGAMI + 1):
error_in_each_origami = round(error_in_each_origami, 2)
logger.info("Checking error: {error_in_each_origami}".format(error_in_each_origami=error_in_each_origami))
degraded_file_name = encoded_file_name + "_degraded_copy_" + str(
COPIES_OF_EACH_ORIGAMIES) + "_error_" + str(error_in_each_origami)
decoded_file_name = test_file_name + "_decoded_copy_" + str(COPIES_OF_EACH_ORIGAMIES) + "_error_" + str(
error_in_each_origami)
# if error_in_each_origami == 0:
total_error_insertion = degrade(dnam_object=dnam_object, file_in=encoded_file_name,
file_out=degraded_file_name,
number_of_error=error_in_each_origami, average=AVERAGE,
false_positive_per_origami=FALSE_POSITIVE_PER_ORIGAMI,
copies=COPIES_OF_EACH_ORIGAMIES)
logger.info("Degradation done")
dnam_decode = ProcessFile(redundancy=50, verbose=VERBOSE)
# try to decode with different decoding parameter
for threshold_data in range(2, 3): # This two loops are for the parameter.
for threshold_parity in range(2, 3): # Now we are choosing only one parameter.
decoded_file_name = test_file_name + "_decoded_copy_" + str(COPIES_OF_EACH_ORIGAMIES) + "_error_" + \
str(error_in_each_origami) + "_scp_" + str(threshold_data) + \
"_tempweight_" + str(threshold_parity)
start_time = time.time()
try:
decoding_status, incorrect_origami, correct_origami, total_error_fixed, missing_origamies \
= dnam_decode.decode(degraded_file_name, decoded_file_name, file_size,
threshold_data=threshold_data,
threshold_parity=threshold_parity,
maximum_number_of_error=MAXIMUM_NUMBER_OF_ERROR_CHECKED_PER_ORIGAMI,
false_positive=FALSE_POSITIVE_PER_ORIGAMI,
individual_origami_info=True,
correct_file=encoded_file_name)
if os.path.exists(encoded_file_name) and os.path.exists(decoded_file_name) and filecmp.cmp(
test_file_name, decoded_file_name):
status = 1
else:
if decoding_status == -1:
status = -1 # We could detect
else:
status = 0 # We couldn't detect
print("Couldn't detect")
except Exception as e:
print(e) # Something went wrong on the decoding
status = -2
incorrect_origami = -1
correct_origami = -1
total_error_fixed = -1
missing_origami = []
decoding_time = round((time.time() - start_time), 2)
with open(result_file_name, "a") as result_file:
result_file.write("{FILE_SIZE},{bits_per_origami},{redundancy},{total_origami_without_red},{total_origami_with_red},{copy},\
{encoding_time},{error_in_each_origami},{total_error_insertion},{total_error_fixed},\
{incorrect_origami},{correct_origami},{missing_oirgami},{decoding_time},{status},{single_threshold_data},\
{single_threshold_parity},{false_positive}\n".format(
FILE_SIZE=file_size,
bits_per_origami=data_bit_per_origami,
redundancy=required_red,
total_origami_without_red=segments,
total_origami_with_red=droplet,
copy=COPIES_OF_EACH_ORIGAMIES,
encoding_time=encoding_time,
error_in_each_origami=error_in_each_origami,
total_error_insertion=total_error_insertion,
total_error_fixed=total_error_fixed,
incorrect_origami=incorrect_origami,
correct_origami=correct_origami,
missing_oirgami=str(missing_origamies).replace(",", " "),
decoding_time=decoding_time,
status=status,
single_threshold_data=threshold_data,
single_threshold_parity=threshold_parity,
false_positive=FALSE_POSITIVE_PER_ORIGAMI
))
if os.path.exists(decoded_file_name) and status == 1:
os.remove(decoded_file_name)
pass
del dnam_decode
if status == 1: # if we can decode the file we will remove that. Otherwise keep it for future debug reference.
os.remove(degraded_file_name)
del dnam_object # clearing up the memory