Hochfrequenz · hamidhajiparvaneh · Apr 16, 2024 · Apr 12, 2024 · Apr 12, 2024 · Apr 12, 2024
diff --git a/src/migmose/parsing.py b/src/migmose/parsing.py
@@ -3,6 +3,8 @@
 """
 
 import json
+import re
+from datetime import datetime
 from pathlib import Path
 from typing import Generator, Union
 
@@ -28,7 +30,7 @@ def find_file_to_format(message_formats: list[EdifactFormat], input_dir: Path) -
         if len(list_of_all_files) == 1:
             file_dict[message_format] = list_of_all_files[0]
         elif len(list_of_all_files) > 1:
-            logger.warning(f"⚠️ There are several files for {message_format}.", fg="red")
+            file_dict[message_format] = get_latest_file(list_of_all_files)
         else:
             logger.warning(f"⚠️ No file found for {message_format}.", fg="red")
     if file_dict:
@@ -37,6 +39,42 @@ def find_file_to_format(message_formats: list[EdifactFormat], input_dir: Path) -
     raise click.Abort()
 
 
+def get_latest_file(file_list):
+    """
+    This function takes a list of docx files Path
+    and returns the Path of the latest MIG docx file based on the timestamp in its name.
+    The timestamp is assumed to be formatted as YYYYMMDD and located just before the ".docx" extension.
+
+    Parameters:
+        file_list (list of Path): A list containing file paths with timestamps.
+
+    Returns:
+        Path: The path of the latest file. Returns None if no valid date is found.
+    """
+
+    def extract_date(file_path):
+        # Regex to extract the date format YYYYMMDD from the filename as a string
+        match = re.search(r"(\d{8})\.docx$", file_path.name)
+        if match:
+            # Return the date as a datetime object for comparison and the path for use
+            return datetime.strptime(match.group(1), "%Y%m%d"), file_path
+        return None, None
+
+    # Initialize variables to keep track of the latest file and date
+    latest_file = None
+    latest_date = None
+
+    for file_path in file_list:
+        date, path = extract_date(file_path)
+        if date:
+            if latest_file is None or date > latest_date:
+                latest_file = path
+                latest_date = date
+
+    # Return the path of the file with the latest date
+    return latest_file
+
+
 def preliminary_output_as_json(table: list[str], message_format: EdifactFormat, output_dir: Path) -> None:
     """
     Writes the preliminary output as json.

diff --git a/...IG-informatorischeLesefassung2.0e-AußerordentlicheVeröffentlichung_99991231_20231001.docx b/...IG-informatorischeLesefassung2.0e-AußerordentlicheVeröffentlichung_99991231_20231001.docx
diff --git a/unittests/test_data/IFTSTAMIG-informatorischeLesefassung2.0e_99991231_20231001.docx b/unittests/test_data/IFTSTAMIG-informatorischeLesefassung2.0e_99991231_20231001.docx
diff --git a/...-informatorischeLesefassung2.0emitFehlerkorrekturenStand11.03.2024_99991231_20240311.docx b/...-informatorischeLesefassung2.0emitFehlerkorrekturenStand11.03.2024_99991231_20240311.docx
diff --git a/unittests/test_parsing.py b/unittests/test_parsing.py
@@ -37,6 +37,17 @@ def test_find_only_one_file(self, caplog):
             assert f"No file found for {EdifactFormat.ORDRSP}." in caplog.text
             assert file_dict[EdifactFormat.ORDCHG] == input_dir / Path("ORDCHG_MIG_1_1_info_20230331_v2.docx")
 
+    def test_find_only_one_file_multiple_docx(self):
+        """
+        Tests to find multiple docx files with the same message_format.
+        """
+        message_formats = [EdifactFormat.IFTSTA]
+        input_dir = Path("unittests/test_data/")
+        file_dict = find_file_to_format(message_formats, input_dir)
+        assert file_dict[EdifactFormat.IFTSTA] == input_dir / Path(
+            "IFTSTAMIG-informatorischeLesefassung2.0emitFehlerkorrekturenStand11.03.2024_99991231_20240311.docx"
+        )
+
     def test_parse_raw_nachrichtenstrukturzeile(self):
         """
         Test to parse the raw nachrichtenstrukturzeile from a docx file.