Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/pip/typing-extensions-4.12.2
Browse files Browse the repository at this point in the history
  • Loading branch information
DeltaDaniel authored Jul 16, 2024
2 parents 85b7f13 + 4ae6968 commit 2ec7bf4
Show file tree
Hide file tree
Showing 15 changed files with 1,636 additions and 29 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ repos:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
exclude: unittests/__snapshots__/*
- repo: https://github.com/psf/black
rev: 24.4.2
hooks:
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,12 @@ Below the available options are listed:
- Description: This option allows the user to specify the path to the directory which should contain the output files generated by the tool. If the directory does not exist, it will be created automatically.
- Example: `--output_dir /path/to/output_directory`
- **Output File Type (`-ft`, `--file-type`):**
- Description: Defines the output format. Choose between csv for flat Nachrichtenstruktur tables, nested_json for json files of the nested Nachrichtenstruktur tables and reduced_nested_json for a reduced nested Nachrichtenstruktur. Default is csv.
- Description: Defines the output format. Choose between (Default is `csv`):
- `csv` for flat Nachrichtenstruktur tables
- `nested_json` for json files of the nested Nachrichtenstruktur tables
- `reduced_nested_json` for a reduced nested Nachrichtenstruktur
- `sgh_json` for segmentgrouphierarchy files (cf. [MAUS sgh](https://github.com/Hochfrequenz/edifact-templates/tree/b024e3671deae9aec7e8ea29e74fa48257f6ccfe/segment_group_hierarchies))
- `tree` for .tree files (cf. [MAUS tree](https://github.com/Hochfrequenz/mig_ahb_utility_stack/blob/5cce94069ead5aa63d4b9ac7f5e0fcec0bf608ea/src/maus/reader/tree_to_sgh.py))
- Example: `--file-type "csv"
- **Format Version (`-fv`, `--format-version`):**
- Description: Defines the format version.
Expand Down
2 changes: 2 additions & 0 deletions dev_requirements/requirements-tests.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# specific requirements for the tox tests env
pytest
pytest_loguru # makes loguru logs accessible to pytest
maus[tree]
syrupy
30 changes: 24 additions & 6 deletions dev_requirements/requirements-tests.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,37 @@
# SHA1:c2ec5869b1beb7fc76c4dcc722605bc1512f62aa
#
# This file is autogenerated by pip-compile-multi
# To update, run:
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile-multi
# pip-compile '.\dev_requirements\requirements-tests.in'
#
attrs==23.2.0
# via maus
colorama==0.4.6
# via
# loguru
# pytest
iniconfig==2.0.0
# via pytest
lark==1.1.9
# via maus
loguru==0.7.2
# via pytest-loguru
marshmallow==3.21.2
# via maus
maus[tree]==0.5.2
# via -r .\dev_requirements\requirements-tests.in
more-itertools==10.3.0
# via maus
packaging==24.0
# via pytest
# via
# marshmallow
# pytest
pluggy==1.5.0
# via pytest
pytest==8.2.2
# via -r dev_requirements/requirements-tests.in
pytest-loguru==0.4.0
# via -r dev_requirements/requirements-tests.in
# via -r .\dev_requirements\requirements-tests.in
win32-setctime==1.1.0
# via loguru
syrupy==4.6.1
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ classifiers = [
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
dependencies = ["click", "loguru", "maus>=0.5.0", "pydantic", "python-docx"] # add all the dependencies here

dependencies = ["click", "loguru", "jinja2","maus>=0.5.2", "pydantic", "python-docx"] # add all the dependencies here

dynamic = ["readme", "version"]

[project.urls]
Expand Down
14 changes: 9 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile pyproject.toml
# pip-compile '.\pyproject.toml'
#
annotated-types==0.7.0
# via pydantic
Expand All @@ -14,21 +14,25 @@ colorama==0.4.6
# via
# click
# loguru
jinja2==3.1.4
# via migmose (pyproject.toml)
loguru==0.7.2
# via migmose (pyproject.toml)
lxml==5.2.2
# via python-docx
marshmallow==3.21.3
markupsafe==2.1.5
# via jinja2
marshmallow==3.21.2
# via maus
maus==0.5.0
maus==0.5.2
# via migmose (pyproject.toml)
more-itertools==10.3.0
# via maus
packaging==24.0
# via marshmallow
pydantic==2.7.1
pydantic==2.8.2
# via migmose (pyproject.toml)
pydantic-core==2.18.2
pydantic-core==2.20.1
# via pydantic
python-docx==1.1.2
# via migmose (pyproject.toml)
Expand Down
19 changes: 17 additions & 2 deletions src/migmose/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@
@click.option(
"-ft",
"--file-type",
type=click.Choice(["csv", "nested_json", "reduced_nested_json", "sgh_json"], case_sensitive=False),
default=["csv", "nested_json", "reduced_nested_json", "sgh_json"],
type=click.Choice(["csv", "nested_json", "reduced_nested_json", "tree"], case_sensitive=False),
default=["csv", "nested_json", "reduced_nested_json", "tree"],
help="Defines the output format. Choose between csv and nested_json and reduced_nested_json. Default is csv.",
multiple=True,
)
Expand Down Expand Up @@ -138,6 +138,21 @@ def main(
output_dir_for_format,
)
sgh.to_json(m_format, output_dir_for_format)
if "tree" in file_type:
nested_nachrichtenstruktur, _ = NestedNachrichtenstruktur.create_nested_nachrichtenstruktur(
nachrichtenstrukturtabelle
)
reduced_nested_nachrichtenstruktur = (
ReducedNestedNachrichtenstruktur.create_reduced_nested_nachrichtenstruktur(nested_nachrichtenstruktur)
)
# Save the reduced nested Nachrichtenstruktur as json
logger.info(
"💾 Saving tree for {} and {} as json to {}.",
m_format,
format_version,
output_dir_for_format,
)
reduced_nested_nachrichtenstruktur.output_tree(m_format, output_dir_for_format)


if __name__ == "__main__":
Expand Down
101 changes: 100 additions & 1 deletion src/migmose/mig/reducednestednachrichtenstruktur.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
"""

import json
from collections import defaultdict
from pathlib import Path
from typing import Any, Optional, TypeAlias
from typing import Any, DefaultDict, Optional, TypeAlias

from jinja2 import Template
from loguru import logger
from maus.edifact import EdifactFormat
from pydantic import BaseModel, Field
Expand Down Expand Up @@ -110,6 +112,90 @@ def _build_segment_dict(
return segment_dict


def _dict_to_tree_str(tree: DefaultDict[str, list[NachrichtenstrukturZeile]]) -> str:
template_str = """{%- for key, segment_list in tree.items() -%}
{{-key-}}:{%- for segment in segment_list -%}
{{segment.bezeichnung}}[{{segment.standard_status}};{{segment.bdew_status}}]
{%- if not loop.last -%},{%- endif -%}
{%- endfor -%}{{"\n"}}
{%- endfor -%}
"""
template = Template(template_str)
return template.render(tree=tree)


def _build_tree_dict(
reduced_nestednachrichtenstruktur: "ReducedNestedNachrichtenstruktur",
tree_dict: Optional[DefaultDict[str, list[NachrichtenstrukturZeile]]] = None,
) -> DefaultDict[str, list[NachrichtenstrukturZeile]]:
"""
Build a dictionary to compose the .tree files in the MAUS library.
"""
if tree_dict is None:
tree_dict = defaultdict(list)
if (
reduced_nestednachrichtenstruktur.header_linie is None
and reduced_nestednachrichtenstruktur.segmente[0] is not None
):
tree_dict["/"] = [
NachrichtenstrukturZeile(
zaehler="0",
nr="00000",
bezeichnung="UNB",
standard_status="M",
bdew_status="M",
standard_maximale_wiederholungen=0,
bdew_maximale_wiederholungen=0,
ebene=0,
inhalt="0",
),
reduced_nestednachrichtenstruktur.segmente[0],
NachrichtenstrukturZeile(
zaehler="0",
nr="00000",
bezeichnung="UNZ",
standard_status="M",
bdew_status="M",
standard_maximale_wiederholungen=0,
bdew_maximale_wiederholungen=0,
ebene=0,
inhalt="0",
),
]
tree_dict["UNH"].extend(
[
segment
for segment in reduced_nestednachrichtenstruktur.segmente
if segment and segment.bezeichnung not in ["UNH", "UNT"]
]
)
tree_dict["UNH"].extend(
[sg.header_linie for sg in reduced_nestednachrichtenstruktur.segmentgruppen if sg and sg.header_linie]
)
tree_dict["UNH"].extend(
[
segment
for segment in reduced_nestednachrichtenstruktur.segmente
if segment and segment.bezeichnung in ["UNT"]
]
)
elif reduced_nestednachrichtenstruktur.header_linie is not None:
if reduced_nestednachrichtenstruktur.segmente not in [[], [None]]:
tree_dict[reduced_nestednachrichtenstruktur.header_linie.bezeichnung].extend(
[segment for segment in reduced_nestednachrichtenstruktur.segmente if segment]
)
if reduced_nestednachrichtenstruktur.segmentgruppen:
tree_dict[reduced_nestednachrichtenstruktur.header_linie.bezeichnung].extend(
[sg.header_linie for sg in reduced_nestednachrichtenstruktur.segmentgruppen if sg and sg.header_linie]
)
else:
raise ValueError("No header line or segment found.")
for segmentgruppe in reduced_nestednachrichtenstruktur.segmentgruppen:
if segmentgruppe is not None:
tree_dict = _build_tree_dict(segmentgruppe, tree_dict)
return tree_dict


class ReducedNestedNachrichtenstruktur(BaseModel):
"""will contain the tree structure of nachrichtenstruktur tables"""

Expand Down Expand Up @@ -149,3 +235,16 @@ def to_json(self, message_type: EdifactFormat, output_dir: Path) -> dict[str, An
json.dump(structured_json, json_file, indent=4)
logger.info("Wrote reduced nested Nachrichtenstruktur for {} to {}", message_type, file_path)
return structured_json

def output_tree(self, message_type: EdifactFormat, output_dir: Path) -> None:
"""Writes reduced NestedNachrichtenstruktur in the .tree grammar of MAUS."""
# generate tree dict
tree_dict = _build_tree_dict(self)
# convert tree dict to string
tree_str = _dict_to_tree_str(tree_dict)
# write tree file
output_dir.mkdir(parents=True, exist_ok=True)
file_path = output_dir / f"{message_type}.tree"
with open(file_path, "w", encoding="utf-8") as tree_file:
tree_file.write(tree_str)
logger.info("Wrote reduced .tree file for {} to {}", message_type, file_path)
3 changes: 2 additions & 1 deletion src/migmose/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ def parse_raw_nachrichtenstrukturzeile(input_path: Path) -> list[str]:
# marks the beginning of the complete nachrichtentruktur table
if line.text == nachrichtenstruktur_header:
mig_tables.extend([row.text for row in docx_object._cells[ind + 1 :]])
break
break

# filter empty rows and headers
mig_tables = [_zfill_nr(row) for row in mig_tables if row not in ("", "\n", nachrichtenstruktur_header)]
return mig_tables
9 changes: 9 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ deps =
setenv = PYTHONPATH = {toxinidir}/src
commands = python -m pytest --basetemp={envtmpdir} {posargs}

[testenv:update_snapshots]
# the tests environment is called by the Github action that runs the unit tests
deps =
-r requirements.txt
-r dev_requirements/requirements-tests.txt
syrupy
setenv = PYTHONPATH = {toxinidir}/src
commands = python -m pytest --basetemp={envtmpdir} {posargs} --snapshot-update

[testenv:linting]
# the linting environment is called by the Github Action that runs the linter
deps =
Expand Down
Loading

0 comments on commit 2ec7bf4

Please sign in to comment.