-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUtils.py
346 lines (299 loc) · 10.9 KB
/
Utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
# coding: utf-8
import csv
import time
import os
import itertools
import base64
import inflection
import csv, codecs, cStringIO
import glob
import math
import pycurl
from io import BytesIO
import pandas as pd
import requests
import shutil
from csv import DictWriter
from cStringIO import StringIO
import datetime
import collections
import os.path
import json
import time
import subprocess
import pandas as pd
class DateUtils:
@staticmethod
def get_current_date_month_day_year():
return datetime.datetime.now().strftime("%m/%d/%Y")
#datetime.datetime(2017, 1, 13, 10, 42, 2, 313956)
@staticmethod
def get_current_date_year_month_day():
return datetime.datetime.now().strftime("%Y_%m_%d_")
@staticmethod
def get_current_timestamp():
'''returns a timestamp of the current time in the format: 2017-01-13T10:53:33.858411Z'''
return datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
@staticmethod
def compare_two_timestamps(t1, t2, dt_format1=None, dt_format2=None, offset_t1=None, offset_t2=None):
'''compares two timestamps a particular time format;
returns the true when t1 is larger than t2'''
default_time_format = '%Y-%m-%dT%H:%M:%S.%fZ'
if dt_format1 is None:
dt_format1 = default_time_format
if dt_format2 is None:
dt_format2 = default_time_format
t1_dtt = datetime.datetime.strptime(t1,dt_format1)
t2_dtt = datetime.datetime.strptime(t2,dt_format2)
if(offset_t1):
t1_dtt = t1_dtt + datetime.timedelta(hours=offset_t1)
if(offset_t2):
t2_dtt = t2_dtt + datetime.timedelta(hours=offset_t2)
t1_dtt = t1_dtt.replace(second=0, microsecond=0)
t2_dtt = t2_dtt.replace(second=0, microsecond=0)
#print t1_dtt
#print t2_dtt
if t1_dtt == t2_dtt:
return False
elif t1_dtt < t2_dtt:
return True
elif t1_dtt > t2_dtt:
return False
return False
@staticmethod
def days_between(d1, dt_fmt1, d2, dt_fmt2):
d1 = datetime.datetime.strptime(d1, dt_fmt1)
d2 = datetime.datetime.strptime(d2, dt_fmt2)
return abs((d2 - d1).days)
@staticmethod
def strToDtObj(strTime, dt_format):
return datetime.datetime.strptime(strTime, dt_format)
@staticmethod
def convertEpochToStrTime(epoch_time, dt_format):
return time.strftime(dt_format, time.localtime(epoch_time))
class PickleUtils:
@staticmethod
def pickle_cells(cells, pickle_name ):
pickle.dump( cells, open(picked_dir + pickle_name + "_pickled_cells.p", "wb" ) )
@staticmethod
def unpickle_cells(pickle_name):
return pickle.load( open(picked_dir + pickle_name +"_pickled_cells.p", "rb" ) )
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
class LargeFileUtils:
@staticmethod
def getFileHeader(file_path, delimiter):
with open(file_path) as fn:
for line in itertools.islice(fn, 0, 1):
line = line.lower().strip()
line = line.split(delimiter)
fn.close()
return line
@staticmethod
def readDictListChunk(file_path, start, end, delimiter, header):
dictChunk = []
with open(file_path) as fn:
for line in itertools.islice(fn, start, end):
dictChunk.append(LargeFileUtils.lineToDict(line, delimiter, header))
fn.close()
return dictChunk
@staticmethod
def lineToDict(line, delimiter, header):
print line
line = line.split(delimiter)
print len(line)
print len(header)
print
#line = [item.strip().replace("\"", "") for item in line]
line = dict(zip(header, line))
print
print line
print
print
return line
class FileUtils:
'''class for file/os util functions'''
@staticmethod
def fileExists(file_path):
return os.path.exists(file_path)
@staticmethod
def read_csv_into_dictlist(fn):
dictList = []
if os.path.exists(fn):
with open(fn) as f:
dictList = [row for row in csv.DictReader(f, skipinitialspace=True)]
return dictList
@staticmethod
def getFileListForDir(filepath_str_to_search):
'''gets file list in a directory based on some path string to search- ie: /home/adam/*.txt'''
return glob.glob(filepath_str_to_search)
@staticmethod
def getAttachmentFullPath(output_dir, output_fn, download_url):
'''downloads an attachment from whereever'''
#equivelent to: curl -L "https://screendoor.dobt.co/attachments/s5wflD750Nxhai9MfNmxes4TR-0xoDyw/download" > whateverFilename.csv
# As long as the file is opened in binary mode, can write response body to it without decoding.
downloaded = False
try:
with open(output_dir + output_fn, 'wb') as f:
c = pycurl.Curl()
c.setopt(c.URL, download_url)
# Follow redirect.
c.setopt(c.FOLLOWLOCATION, True)
c.setopt(c.WRITEDATA, f)
c.perform()
c.close()
downloaded = True
except Exception, e:
print str(e)
return downloaded
@staticmethod
def getFiles(output_dir, output_fn, download_url ):
dowloaded = False
r = requests.get(download_url, stream=True)
with open(output_dir+output_fn, 'wb') as f:
shutil.copyfileobj(r.raw, f)
downloaded = True
return downloaded
@staticmethod
def remove_files_on_regex(dir, regex):
files_to_remove = FileUtils.getFileListForDir(dir + regex )
for the_file in files_to_remove:
try:
if os.path.isfile(the_file):
os.unlink(the_file)
#this would remove subdirs
#elif os.path.isdir(file_path): shutil.rmtree(file_path)
except Exception as e:
print(e)
@staticmethod
def write_wkbk_csv(fn, dictList, headerCols):
wrote_wkbk = False
with open(fn, 'w') as csvfile:
try:
fieldnames = headerCols
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for data in dictList:
#print data
try:
writer.writerow({ s:str(v).encode("ascii", 'ignore') for s, v in data.iteritems() } )
except Exception, e:
print str(e)
print "could not write row"
wrote_wkbk = True
except Exception, e:
print str(e)
return wrote_wkbk
@staticmethod
def write_json_object(json_object, output_dir, json_fn):
wroteFile = False
try:
json_object = EncodeObjects.convertToUTF8(json_object)
with open(output_dir + json_fn, 'w') as f:
json.dump(json_object, f, ensure_ascii=False)
wroteFile = True
except Exception, e:
print str(e)
return wroteFile
@staticmethod
def loadJsonFile(path_to_file, json_fn):
json_obj = {}
if os.path.isfile( path_to_file + json_fn):
json_data = open(path_to_file + json_fn).read()
json_obj = json.loads(json_data)
return json_obj
class ListUtils:
'''class for list util functions'''
@staticmethod
def flatten_list(listofLists):
return [item for sublist in listofLists for item in sublist]
class EncodeObjects:
@staticmethod
def convertToString(data):
'''converts unicode to string'''
if isinstance(data, basestring):
return str(data)
elif isinstance(data, collections.Mapping):
return dict(map(EncodeObjects.convertToString, data.iteritems()))
elif isinstance(data, collections.Iterable):
return type(data)(map(EncodeObjects.convertToString, data))
else:
return data
@staticmethod
def convertToUTF8(data):
'''converts unicode to string'''
if isinstance(data, basestring):
return data.encode('utf-8')
elif isinstance(data, collections.Mapping):
return dict(map(EncodeObjects.convertToUTF8, data.iteritems()))
elif isinstance(data, collections.Iterable):
return type(data)(map(EncodeObjects.convertToUTF8, data))
else:
return data
class ShtUtils:
'''class for common wksht util functions'''
@staticmethod
def getWkbk(fn):
wkbk = pd.ExcelFile(fn)
return wkbk
@staticmethod
def get_sht_names(wkbk):
shts = wkbk.sheet_names
return [ sht for sht in shts if sht != 'Dataset Summary']
class WkbkUtils:
'''util class for dealing with excel workbooks'''
@staticmethod
def get_shts(fn):
'''gets the sheets from the workbook as a dcitionary'''
wkbk = ShtUtils.getWkbk(fn)
sht_names = ShtUtils.get_sht_names(wkbk)
return {'wkbk': wkbk, 'shts': sht_names}
@staticmethod
def getShtDf(wkbk_stuff, wkbkName, skipRows):
'''turns a wksht into a df based on a name and the number of rows to skip'''
dfSht = False
df = wkbk_stuff['wkbk'].parse(wkbkName, header=skipRows )
dfCols = list(df.columns)
if len(dfCols) > 3:
return df
return dfSht
class ListUtils:
@staticmethod
def makeChunks( lst, chunkSize,):
return [ lst[x:x+ chunkSize] for x in xrange(0, len(lst), chunkSize)]
class SubProcessUtils:
@staticmethod
def getFileLen(fname):
p = subprocess.Popen(['wc', '-l', fname], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
result, err = p.communicate()
if p.returncode != 0:
raise IOError(err)
return int(result.strip().split()[0])
class TimeoutException(Exception):
#http://stackoverflow.com/questions/25027122/break-the-function-after-certain-time
pass
if __name__ == "__main__":
main()