diff --git a/src/migmose/__main__.py b/src/migmose/__main__.py index af5d94e..b0498eb 100644 --- a/src/migmose/__main__.py +++ b/src/migmose/__main__.py @@ -142,7 +142,7 @@ def main( format_version, output_dir_for_format, ) - document_version = _extract_document_version(file) + document_version, *_ = _extract_document_version(file) reduced_nested_nachrichtenstruktur.output_tree(m_format, output_dir_for_format, document_version) diff --git a/src/migmose/parsing.py b/src/migmose/parsing.py index 7cd5711..58b65cd 100644 --- a/src/migmose/parsing.py +++ b/src/migmose/parsing.py @@ -160,28 +160,29 @@ def parse_raw_nachrichtenstrukturzeile(input_path: Path) -> list[str]: _pattern = re.compile( - r"MIG(?:Strom|Gas)?-?informatorischeLesefassung?(.*?)" + r"MIG(?:Strom|Gas)?-?informatorischeLesefassung?((\d+)\.(\d+)([a-z]?))" r"(?:_|KonsolidierteLesefassung|-AußerordentlicheVeröffentlichung)", re.IGNORECASE, ) -def _extract_document_version(path: Path | str) -> str: +def _extract_document_version(path: Path | str) -> tuple[str, int | None, int | None, str]: + """Returns the document version, major, minor, and suffix from the given file path.""" if isinstance(path, str): document_str = path else: document_str = str(path) matches = _pattern.search(document_str) if matches: - document_version = matches.group(1) + document_version, major, minor, suffix = matches.groups() if document_version == "": logger.warning(f"❌ No document version found in {path}.", fg="red") - return document_version + return document_version or "", int(major) or 0, int(minor) or 0, suffix or "" logger.error(f"❌ Unexpected document name in {path}.", fg="red") - return "" + return "", None, None, "" -def _get_sort_key(path: Path) -> tuple[int, int, str]: +def _get_sort_key(path: Path) -> tuple[int, int, int | None, int | None, str]: """ Extracts the sort key from the given path. @@ -194,5 +195,5 @@ def _get_sort_key(path: Path) -> tuple[int, int, str]: parts = path.stem.split("_") gueltig_von_date = int(parts[-1]) gueltig_bis_date = int(parts[-2]) - version_number = _extract_document_version(parts[-3]) - return gueltig_von_date, gueltig_bis_date, version_number + _, major, minor, suffix = _extract_document_version(parts[-3]) + return gueltig_von_date, gueltig_bis_date, major, minor, suffix diff --git a/unittests/__snapshots__/test_parsing.ambr b/unittests/__snapshots__/test_parsing.ambr index a3ea520..42f5164 100644 --- a/unittests/__snapshots__/test_parsing.ambr +++ b/unittests/__snapshots__/test_parsing.ambr @@ -1,16 +1,41 @@ # serializer version: 1 # name: TestParsing.test_extract_document_version[IFTSTA] - '' + tuple( + '', + None, + None, + '', + ) # --- # name: TestParsing.test_extract_document_version[REMADV] - '2.9b' + tuple( + '2.9b', + 2, + 9, + 'b', + ) # --- # name: TestParsing.test_extract_document_version[REQOTE] - '1.3' + tuple( + '1.3', + 1, + 3, + '', + ) # --- # name: TestParsing.test_extract_document_version[UTILMDG] - 'G1.0a' + tuple( + '', + None, + None, + '', + ) # --- # name: TestParsing.test_extract_document_version[UTILMDS] - 'S1.1' + tuple( + '', + None, + None, + '', + ) # --- diff --git a/unittests/test_reduced_nested_nachrichtenstruktur.py b/unittests/test_reduced_nested_nachrichtenstruktur.py index b11dcf5..3dec7c5 100644 --- a/unittests/test_reduced_nested_nachrichtenstruktur.py +++ b/unittests/test_reduced_nested_nachrichtenstruktur.py @@ -92,7 +92,7 @@ def test_output_tree(self, message_format: EdifactFormat, tmp_path, snapshot): reduced_nested_nachrichtenstruktur = ReducedNestedNachrichtenstruktur.create_reduced_nested_nachrichtenstruktur( nested_nachrichtenstruktur ) - document_version = _extract_document_version(file_path) + document_version, *_ = _extract_document_version(file_path) reduced_nested_nachrichtenstruktur.output_tree(message_format, tmp_path, document_version) with open(tmp_path / f"{message_format}{document_version}.tree", "r", encoding="utf-8") as actual_file: assert actual_file.read() == snapshot