Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: criterion for latest file and tests #144

Merged
merged 1 commit into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions src/migmose/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,26 @@ def get_latest_file(file_list: list[Path]) -> Path:
Path: The path of the latest file. Returns None if no valid date is found.
"""
# Initialize variables to keep track of the latest file and date
latest_file: Path
latest_date: datetime | None = None

for file_path in file_list:
date, path = _extract_date(file_path)
if latest_date is None or date > latest_date:
latest_file = path
latest_date = date

if len(file_list) == 1:
logger.info("Using the only file: {}", file_list[0])
return file_list[0]
try:
# Define the keywords to filter relevant files
keywords = ["konsolidiertelesefassungmitfehlerkorrekturen", "außerordentlicheveröffentlichung"]

# Find the most recent file based on keywords and date suffixes
latest_file = max(
(path for path in file_list if any(keyword in path.name.lower() for keyword in keywords)),
key=lambda path: (
int(path.stem.split("_")[-1]), # "gültig von" date
int(path.stem.split("_")[-2]), # "gültig bis" date
),
)

except ValueError as e:
logger.error("Error processing file list: {}", e)

logger.info("Using the latest file: {}", latest_file)
# Return the path of the file with the latest date
return latest_file

Expand Down
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion unittests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_find_only_one_file_multiple_docx(self):
assert (
file_dict[EdifactFormat.IFTSTA]
== path_to_test_FV2310
/ "IFTSTAMIG-informatorischeLesefassung2.0emitFehlerkorrekturenStand11.03.2024_99991231_20240311.docx"
/ "IFTSTAMIG-informatorischeLesefassung2.0e-AußerordentlicheVeröffentlichung_20250403_20240311.docx"
)

def test_parse_raw_nachrichtenstrukturzeile(self):
Expand Down