From dd676b78dd7a5fa871ac46af7bce18ebd18b0ec7 Mon Sep 17 00:00:00 2001 From: StarmanMartin Date: Thu, 25 Jan 2024 15:15:40 +0100 Subject: [PATCH] feat: Tiff file reader. (#77) Used Irregular tags which are parsed from the file --- converter_app/readers/__init__.py | 2 + converter_app/readers/tif.py | 66 +++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 converter_app/readers/tif.py diff --git a/converter_app/readers/__init__.py b/converter_app/readers/__init__.py index de4cda64..96ab83db 100644 --- a/converter_app/readers/__init__.py +++ b/converter_app/readers/__init__.py @@ -23,6 +23,7 @@ from .dwl import DWLReader from .ebl import EblReader from .cfx import CfxReader +from .tif import TifReader from .jcamp_conv_reader import JcampReader from .pdf import PdfReader from .lithoz_pdf import PdfLithozReader @@ -85,6 +86,7 @@ def match_reader(self, file): registry.register(DWLReader) registry.register(EblReader) registry.register(CfxReader) +registry.register(TifReader) registry.register(JcampReader) registry.register(PdfReader) registry.register(PdfLithozReader) diff --git a/converter_app/readers/tif.py b/converter_app/readers/tif.py new file mode 100644 index 00000000..92a90d8d --- /dev/null +++ b/converter_app/readers/tif.py @@ -0,0 +1,66 @@ +import logging +import re +from .base import Reader + +logger = logging.getLogger(__name__) + +UNIT_EXTENSION = "_unit" + + +class TifReader(Reader): + identifier = 'tif_reader' + priority = 96 + _parsed_values = None + + + def check(self): + result = False + if self.file.suffix.lower() == '.tif' and self.file.mime_type == 'image/tiff': + self._parsed_values = self._read_img() + result = self._parsed_values is not None and len(self._parsed_values) > 0 + logger.debug('result=%s', result) + return result + def _read_img(self): + txt = re.sub(r'\\x[0-9a-f]{2}', '', self.file.content.__str__()) + + txt = re.sub(r'^.+@@@@@@0\\r\\n', '', txt) + lines = re.split(r'\\r\\n', txt) + del lines[-1] + return [x.split('=') for x in lines] + + + def get_value(self, value): + if self.float_de_pattern.match(value): + # remove any digit group seperators and replace the comma with a period + return value.replace('.', '').replace(',', '.') + if self.float_us_pattern.match(value): + # just remove the digit group seperators + return value.replace(',', '') + else: + return None + + def get_tables(self): + tables = [] + table = self.append_table(tables) + for val in self._parsed_values: + if len(val) == 1: + num_val = self.get_value(val[0]) + if num_val is not None: + table['rows'].append([len(table['rows']), len(table['rows']), float(num_val)]) + else: + table['metadata'][val[0]] = '='.join(val[1:]) + table['header'].append(f"{'='.join(val)}") + + table['columns'].append({ + 'key': '1', + 'name': 'Idx' + }) + table['columns'].append({ + 'key': '2', + 'name': 'Number' + }) + + table['metadata']['rows'] = str(len(table['rows'])) + table['metadata']['columns'] = str(len(table['columns'])) + + return tables