From 6027cbbfe5c55fad74f660af85efc1637ec72e09 Mon Sep 17 00:00:00 2001 From: Lan Dam Date: Thu, 26 Aug 2021 07:55:47 -0600 Subject: [PATCH 1/4] catch & log error for empty array --- ph5/core/ph5api.py | 8 +++++++- ph5/utilities/ph5validate.py | 10 ++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ph5/core/ph5api.py b/ph5/core/ph5api.py index ad11bef8..420a4002 100755 --- a/ph5/core/ph5api.py +++ b/ph5/core/ph5api.py @@ -564,7 +564,13 @@ def read_array_t(self, name): if not self.Array_t_names: self.read_array_t_names() if name in self.Array_t_names: - rows, keys = self.ph5_g_sorts.read_arrays(name) + try: + rows, keys = self.ph5_g_sorts.read_arrays(name) + except TypeError as e: + if 'NoneType' in str(e): + self.Array_t[name] = {'byid': {}, 'order': [], 'keys': []} + msg = "Table %s is empty." % name + raise APIError(4, msg) byid, order = by_id( rows, secondary_key='channel_number_i', unique_key=False) self.Array_t[name] = {'byid': byid, 'order': order, 'keys': keys} diff --git a/ph5/utilities/ph5validate.py b/ph5/utilities/ph5validate.py index fb1eac55..250314b4 100755 --- a/ph5/utilities/ph5validate.py +++ b/ph5/utilities/ph5validate.py @@ -65,9 +65,15 @@ def __init__(self, ph5API_object, ph5path): def read_arrays(self, name): if name is None: for n in self.ph5.Array_t_names: - self.ph5.read_array_t(n) + try: + self.ph5.read_array_t(n) + except ph5api.APIError as e: + LOGGER.error(e.msg) else: - self.ph5.read_array_t(name) + try: + self.ph5.read_array_t(name) + except ph5api.APIError as e: + LOGGER.error(e.msg) def read_events(self, name): if name is None: From 5349ddb347b57df8c6986eca61a74a724c592e24 Mon Sep 17 00:00:00 2001 From: Lan Dam Date: Thu, 26 Aug 2021 09:56:03 -0600 Subject: [PATCH 2/4] checking for missing minifile, empty das table. Add array in headder of ph5_validate.log for tracking --- ph5/utilities/ph5validate.py | 94 +++++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 27 deletions(-) diff --git a/ph5/utilities/ph5validate.py b/ph5/utilities/ph5validate.py index 250314b4..5b49df65 100755 --- a/ph5/utilities/ph5validate.py +++ b/ph5/utilities/ph5validate.py @@ -57,6 +57,7 @@ class PH5Validate(object): def __init__(self, ph5API_object, ph5path): self.ph5 = ph5API_object self.path = ph5path + self.miniFileNotFound = set() if not self.ph5.Array_t_names: self.ph5.read_array_t_names() if not self.ph5.Experiment_t: @@ -469,7 +470,48 @@ def check_station_completeness(self, station): if sensor_serial is None: warning.append("Sensor serial number is missing.") - self.ph5.read_das_t(das_serial, reread=False) + if not station['sensor/manufacturer_s']: + warning.append("Sensor manufacturer is " + "missing. Is this correct???") + + if not station['sensor/model_s']: + warning.append("Sensor model is missing. " + "Is this correct???") + + if not station['das/manufacturer_s']: + warning.append("DAS manufacturer is missing. " + "Is this correct???") + + if not station['das/model_s']: + warning.append("DAS model is missing. " + "Is this correct???") + + errmsg = "" + try: + self.ph5.read_das_t(das_serial, reread=False) + except IOError as e: + if 'does not exist' in str(e): + minifile = str(e).split("``")[1] + errmsg = str(e) + ("The file is required for Das %s." + % das_serial) + self.miniFileNotFound.add(minifile) + else: + raise e + except AttributeError as e: + if "'NoneType' object has no attribute '_v_parent'" == str(e): + errmsg = ("Minifile for Das %s is missing. " + "Cannot identifind minifile's name" % das_serial) + else: + raise e + except TypeError as e: + if "argument of type 'NoneType' is not iterable" == str(e): + errmsg = "Das_t_%s does not exist in minifile." % das_serial + else: + raise e + if errmsg != "": + warning.append(errmsg) + return info, warning, error + sample_rate = station['sample_rate_i'] nodata_err = None if das_serial not in self.ph5.Das_t: @@ -523,9 +565,13 @@ def check_station_completeness(self, station): check_end = das_time_list[index+1][0] - 1 i = 1 # while loop to avoid using overlaping row - while check_end < check_start: - i += 1 - check_end = das_time_list[index+i][0] - 1 + try: + while check_end < check_start: + i += 1 + check_end = das_time_list[index+i][0] - 1 + except IndexError: + # all are overlapped + check_end = das_time_list[index][1] try: # clear das to make sure get_extent consider channel & sr self.ph5.forget_das_t(das_serial) @@ -561,22 +607,6 @@ def check_station_completeness(self, station): "Other channels seem to exist" .format(str(channel_id))) - if not station['sensor/manufacturer_s']: - warning.append("Sensor manufacturer is " - "missing. Is this correct???") - - if not station['sensor/model_s']: - warning.append("Sensor model is missing. " - "Is this correct???") - - if not station['das/manufacturer_s']: - warning.append("DAS manufacturer is missing. " - "Is this correct???") - - if not station['das/model_s']: - warning.append("DAS model is missing. " - "Is this correct???") - return info, warning, error def analyze_time(self): @@ -621,9 +651,13 @@ def analyze_time(self): dt['min_deploy_time'] = [dt['time_windows'][0][0]] dt['max_pickup_time'] = [max([t[1] for t in dt['time_windows']])] # look for data outside time border of each set - true_deploy, true_pickup = self.ph5.get_extent(das=d, - component=c, - sample_rate=spr) + try: + true_deploy, true_pickup = self.ph5.get_extent(das=d, + component=c, + sample_rate=spr) + except IOError as e: + dt['min_deploy_time'].append(str(e)) + continue if true_deploy is None: # No data found. But don't give warning here because it # will be given in check_station_completness @@ -716,11 +750,12 @@ def check_array_t(self): if info or warning or error: header = ("-=-=-=-=-=-=-=-=-\n" - "Station {0} Channel {1}\n" - "{2} error, {3} warning, " - "{4} info\n" + "{0} Station {1} Channel {2}\n" + "{3} error, {4} warning, " + "{5} info\n" "-=-=-=-=-=-=-=-=-\n" - .format(str(station_id), + .format(str(array_name), + str(station_id), str(channel_id), len(error), len(warning), @@ -936,7 +971,12 @@ def main(): for vb in validation_blocks: vb.write_to_log(log_file, args.level) + if len(ph5validate.miniFileNotFound) != 0: + msg = "The following files are missing: %s" % ", ".join(sorted( + ph5validate.miniFileNotFound)) + LOGGER.warning(msg) ph5API_object.close() + sys.stdout.write("\nWarnings, Errors and suggestions " "written to logfile: %s\n" % args.outfile) except ph5api.APIError as err: From add142064d7308224de5e14ff467f03a76b634cc Mon Sep 17 00:00:00 2001 From: Lan Dam Date: Thu, 26 Aug 2021 10:22:54 -0600 Subject: [PATCH 3/4] correct error msg for empty das or no mini file, check for duplicated station. --- ph5/utilities/ph5validate.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/ph5/utilities/ph5validate.py b/ph5/utilities/ph5validate.py index 5b49df65..33eda7f8 100755 --- a/ph5/utilities/ph5validate.py +++ b/ph5/utilities/ph5validate.py @@ -492,20 +492,14 @@ def check_station_completeness(self, station): except IOError as e: if 'does not exist' in str(e): minifile = str(e).split("``")[1] - errmsg = str(e) + ("The file is required for Das %s." + errmsg = str(e) + (". The file is required for Das %s." % das_serial) self.miniFileNotFound.add(minifile) else: raise e - except AttributeError as e: - if "'NoneType' object has no attribute '_v_parent'" == str(e): - errmsg = ("Minifile for Das %s is missing. " - "Cannot identifind minifile's name" % das_serial) - else: - raise e except TypeError as e: if "argument of type 'NoneType' is not iterable" == str(e): - errmsg = "Das_t_%s does not exist in minifile." % das_serial + errmsg = "Table Das_t_%s is empty." % das_serial else: raise e if errmsg != "": @@ -687,6 +681,8 @@ def check_array_t(self): self.analyze_time() array_names = sorted(self.ph5.Array_t_names) for array_name in array_names: + check_dup_sta_list = [] + dup_sta_list = set() arraybyid = self.ph5.Array_t[array_name]['byid'] arrayorder = self.ph5.Array_t[array_name]['order'] for ph5_station in arrayorder: @@ -696,6 +692,10 @@ def check_array_t(self): for st_num in range(0, station_len): station = station_list[deployment][st_num] station_id = station['id_s'] + if station not in check_dup_sta_list: + check_dup_sta_list.append(station) + else: + dup_sta_list.add(station_id) channel_id = station['channel_number_i'] cha_code = (station['seed_band_code_s'] + station['seed_instrument_code_s'] + @@ -765,6 +765,10 @@ def check_array_t(self): warning=warning, error=error) validation_blocks.append(vb) + if len(dup_sta_list) > 0: + msg = ("The following stations are duplicated in %s: %s" + % (array_name, ', '.join(sorted(dup_sta_list)))) + LOGGER.warning(msg) return validation_blocks def check_event_t_completeness(self, event): From ca7a39dd7d432753fcf206a84930926bcc4b9dd7 Mon Sep 17 00:00:00 2001 From: Lan Dam Date: Thu, 26 Aug 2021 10:32:49 -0600 Subject: [PATCH 4/4] add Array_t info to header of station in ph5validate's unittest --- ph5/utilities/tests/test_ph5validate.py | 26 ++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/ph5/utilities/tests/test_ph5validate.py b/ph5/utilities/tests/test_ph5validate.py index 9b7c010f..737946ff 100755 --- a/ph5/utilities/tests/test_ph5validate.py +++ b/ph5/utilities/tests/test_ph5validate.py @@ -59,7 +59,7 @@ def test_check_array_t(self): if 'Station 9001' in r.heading: self.assertEqual(r.heading, "-=-=-=-=-=-=-=-=-\n" - "Station 9001 Channel 1\n" + "Array_t_009 Station 9001 Channel 1\n" "4 error, 1 warning, 0 info\n" "-=-=-=-=-=-=-=-=-\n" ) @@ -83,7 +83,7 @@ def test_check_array_t(self): if 'Station 0407 Channel -2' in r.heading: self.assertEqual(r.heading, "-=-=-=-=-=-=-=-=-\n" - "Station 0407 Channel -2\n" + "Array_t_004 Station 0407 Channel -2\n" "1 error, 2 warning, 0 info\n" "-=-=-=-=-=-=-=-=-\n" ) @@ -135,7 +135,8 @@ def test_main(self): 'run experiment_t_gen to create table\n') self.assertEqual( all_logs[3], - 'Station 9001 Channel 1\n2 error, 3 warning, 0 info\n') + 'Array_t_009 Station 9001 Channel 1\n' + '2 error, 3 warning, 0 info\n') self.assertEqual( all_logs[4], 'ERROR: No Response table found. Have you run resp_load yet?\n' @@ -146,7 +147,7 @@ def test_main(self): '2 time(s)\n') self.assertEqual( all_logs[5], - 'Station 9002 Channel 1\n2 error, 2 warning, 0 info\n') + 'Array_t_009 Station 9002 Channel 1\n2 error, 2 warning, 0 info\n') self.assertEqual( all_logs[6], 'ERROR: No Response table found. Have you run resp_load yet?\n' @@ -155,7 +156,7 @@ def test_main(self): 'WARNING: Data exists after pickup time: 36 seconds.\n') self.assertEqual( all_logs[7], - 'Station 9003 Channel 1\n2 error, 2 warning, 0 info\n') + 'Array_t_009 Station 9003 Channel 1\n2 error, 2 warning, 0 info\n') self.assertEqual( all_logs[8], 'ERROR: No Response table found. Have you run resp_load yet?\n' @@ -178,21 +179,21 @@ def test_main(self): 'run experiment_t_gen to create table\n') self.assertEqual( all_logs[3], - 'Station 9001 Channel 1\n2 error, 3 warning, 0 info\n') + 'Array_t_009 Station 9001 Channel 1\n2 error, 3 warning, 0 info\n') self.assertEqual( all_logs[4], 'ERROR: No Response table found. Have you run resp_load yet?\n' 'ERROR: Response_t has no entry for n_i=7\n') self.assertEqual( all_logs[5], - 'Station 9002 Channel 1\n2 error, 2 warning, 0 info\n') + 'Array_t_009 Station 9002 Channel 1\n2 error, 2 warning, 0 info\n') self.assertEqual( all_logs[6], 'ERROR: No Response table found. Have you run resp_load yet?\n' 'ERROR: Response_t has no entry for n_i=7\n') self.assertEqual( all_logs[7], - 'Station 9003 Channel 1\n2 error, 2 warning, 0 info\n') + 'Array_t_009 Station 9003 Channel 1\n2 error, 2 warning, 0 info\n') self.assertEqual( all_logs[8], 'ERROR: No Response table found. Have you run resp_load yet?\n' @@ -265,7 +266,8 @@ def test_check_array_t(self): ) self.assertEqual(vb[0].heading, - '-=-=-=-=-=-=-=-=-\nStation 9001 Channel 1\n' + '-=-=-=-=-=-=-=-=-\n' + 'Array_t_009 Station 9001 Channel 1\n' '2 error, 3 warning, 0 info\n-=-=-=-=-=-=-=-=-\n') self.assertEqual(vb[0].info, []) self.assertEqual( @@ -280,7 +282,8 @@ def test_check_array_t(self): ) self.assertEqual(vb[1].heading, - '-=-=-=-=-=-=-=-=-\nStation 9002 Channel 1\n' + '-=-=-=-=-=-=-=-=-\n' + 'Array_t_009 Station 9002 Channel 1\n' '2 error, 2 warning, 0 info\n-=-=-=-=-=-=-=-=-\n') self.assertEqual(vb[1].info, []) self.assertEqual( @@ -294,7 +297,8 @@ def test_check_array_t(self): ) self.assertEqual(vb[2].heading, - '-=-=-=-=-=-=-=-=-\nStation 9003 Channel 1\n' + '-=-=-=-=-=-=-=-=-\n' + 'Array_t_009 Station 9003 Channel 1\n' '2 error, 2 warning, 0 info\n-=-=-=-=-=-=-=-=-\n') self.assertEqual(vb[2].info, []) self.assertEqual(