Skip to content

Commit

Permalink
added named capturing groups and added examples to docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
DeltaDaniel committed Oct 8, 2024
1 parent 8bc3e91 commit fd4e6ae
Showing 1 changed file with 24 additions and 6 deletions.
30 changes: 24 additions & 6 deletions src/migmose/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,21 +160,36 @@ def parse_raw_nachrichtenstrukturzeile(input_path: Path) -> list[str]:


_pattern = re.compile(
r"MIG(?:Strom|Gas)?-?informatorischeLesefassung?((\d+)\.(\d+)([a-z]?))"
r"MIG(?:Strom|Gas)?-?informatorischeLesefassung?(?P<version>(?P<major>\d+)\.(?P<minor>\d+)(?P<suffix>[a-z]?))"
r"(?:_|KonsolidierteLesefassung|-AußerordentlicheVeröffentlichung)",
re.IGNORECASE,
)


def _extract_document_version(path: Path | str) -> tuple[str, int | None, int | None, str]:
"""Returns the document version, major, minor, and suffix from the given file path."""
"""
Extracts the document version (major.minor+suffix) details from the given file path.
Args:
path (Path | str): The path to the file.
Example: path/to/file/ORDCHGMIG-informatorischeLesefassung1.1a_99991231_20231001.docx
-> version: 1.1a, major: 1, minor: 1, suffix: a
Returns:
tuple: A tuple containing the document version (str), major version (int or None),
minor version (int or None), and suffix (str).
"""

if isinstance(path, str):
document_str = path
else:
document_str = str(path)
matches = _pattern.search(document_str)
if matches:
document_version, major, minor, suffix = matches.groups()
document_version = matches.group("version")
major = matches.group("major")
minor = matches.group("minor")
suffix = matches.group("suffix")
if document_version == "":
logger.warning(f"❌ No document version found in {path}.", fg="red")
return document_version or "", int(major) or 0, int(minor) or 0, suffix or ""
Expand All @@ -186,11 +201,14 @@ def _get_sort_key(path: Path) -> tuple[int, int, int | None, int | None, str]:
"""
Extracts the sort key from the given path.
Parameters:
- path (Path): The path object to extract the sort key from.
Args:
path (Path): The path object to extract the sort key from.
Example: path/to/file/ORDCHGMIG-informatorischeLesefassung1.1a_99991231_20231001.docx
with gueltig_von_date: 20231001 and gueltig_bis_date: 99991231, major: 1, minor: 1, suffix: a
Returns:
- tuple: A tuple containing the "gültig von" date, "gültig bis" date, and version number.
tuple: A tuple containing the "gültig von" date (int),
"gültig bis" date (int), major version (int or None), minor version (int or None), and suffix (str).
"""
parts = path.stem.split("_")
gueltig_von_date = int(parts[-1])
Expand Down

0 comments on commit fd4e6ae

Please sign in to comment.