-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Checking if there are more than one MIG docx files for a message_format #37
Changes from 3 commits
f7d3bb6
21bde39
380e4ad
3d3c4fd
23d7d6d
e530ef7
9a646a3
fa7d688
47995c7
e90ae30
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,8 @@ | |
""" | ||
|
||
import json | ||
import re | ||
from datetime import datetime | ||
from pathlib import Path | ||
from typing import Generator, Union | ||
|
||
|
@@ -28,7 +30,7 @@ def find_file_to_format(message_formats: list[EdifactFormat], input_dir: Path) - | |
if len(list_of_all_files) == 1: | ||
file_dict[message_format] = list_of_all_files[0] | ||
elif len(list_of_all_files) > 1: | ||
logger.warning(f"⚠️ There are several files for {message_format}.", fg="red") | ||
file_dict[message_format] = get_latest_file(list_of_all_files) | ||
else: | ||
logger.warning(f"⚠️ No file found for {message_format}.", fg="red") | ||
if file_dict: | ||
|
@@ -37,6 +39,42 @@ def find_file_to_format(message_formats: list[EdifactFormat], input_dir: Path) - | |
raise click.Abort() | ||
|
||
|
||
def get_latest_file(file_list): | ||
hamidhajiparvaneh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
This function takes a list of docx files Path | ||
and returns the Path of the latest MIG docx file based on the timestamp in its name. | ||
The timestamp is assumed to be formatted as YYYYMMDD and located just before the ".docx" extension. | ||
|
||
Parameters: | ||
file_list (list of Path): A list containing file paths with timestamps. | ||
|
||
Returns: | ||
Path: The path of the latest file. Returns None if no valid date is found. | ||
""" | ||
|
||
def extract_date(file_path): | ||
# Regex to extract the date format YYYYMMDD from the filename as a string | ||
match = re.search(r"(\d{8})\.docx$", file_path.name) | ||
if match: | ||
# Return the date as a datetime object for comparison and the path for use | ||
return datetime.strptime(match.group(1), "%Y%m%d"), file_path | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. das datetime hat keine zeitzone 😶 aber es ist egal, weil alle keine zeitzone haben :) |
||
return None, None | ||
|
||
# Initialize variables to keep track of the latest file and date | ||
latest_file = None | ||
latest_date = None | ||
hamidhajiparvaneh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
for file_path in file_list: | ||
date, path = extract_date(file_path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
if date: | ||
if latest_file is None or date > latest_date: | ||
latest_file = path | ||
latest_date = date | ||
|
||
# Return the path of the file with the latest date | ||
return latest_file | ||
|
||
|
||
def preliminary_output_as_json(table: list[str], message_format: EdifactFormat, output_dir: Path) -> None: | ||
""" | ||
Writes the preliminary output as json. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,17 @@ def test_find_only_one_file(self, caplog): | |
assert f"No file found for {EdifactFormat.ORDRSP}." in caplog.text | ||
assert file_dict[EdifactFormat.ORDCHG] == input_dir / Path("ORDCHG_MIG_1_1_info_20230331_v2.docx") | ||
|
||
def test_find_only_one_file_multiple_docx(self): | ||
""" | ||
Tests to find multiple docx files with the same message_format. | ||
""" | ||
message_formats = [EdifactFormat.IFTSTA] | ||
input_dir = Path("unittests/test_data/") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am surprised that this path is working :D |
||
file_dict = find_file_to_format(message_formats, input_dir) | ||
assert file_dict[EdifactFormat.IFTSTA] == input_dir / Path( | ||
"IFTSTAMIG-informatorischeLesefassung2.0emitFehlerkorrekturenStand11.03.2024_99991231_20240311.docx" | ||
) | ||
|
||
def test_parse_raw_nachrichtenstrukturzeile(self): | ||
""" | ||
Test to parse the raw nachrichtenstrukturzeile from a docx file. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Kannst du hier Type hints adden?