Skip to content

Commit

Permalink
process tables (#12)
Browse files Browse the repository at this point in the history
* init datamodell

* add logger

* read line by line

* added json output

* initialize NachrichtenStrukturzeile from raw parsed table line

* message_type from maus.edifact.EdifactFormat

* changed names: SegmentGruppe -> NestedNachrichtenstruktur

* renamed: BaumdiagrammSegmentGruppe -> ReducedNestedNachrichtenstruktur

* ➕ added test for json output of nested nachrichtenstruktur

* updated docstrings

---------

Co-authored-by: kevin <68426071+hf-krechan@users.noreply.github.com>
Co-authored-by: konstantin <konstantin.klein@hochfrequenz.de>
  • Loading branch information
3 people authored Mar 12, 2024
1 parent c76255a commit dbd8507
Show file tree
Hide file tree
Showing 19 changed files with 509 additions and 339 deletions.
14 changes: 9 additions & 5 deletions src/migmose/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import click
from maus.edifact import EdifactFormat

from migmose.parsing import find_file_to_format, parse_raw_nachrichtenstrukturzeile, preliminary_output_as_json
from migmose.mig.nachrichtenstruktur import NachrichtenstrukturTabelle
from migmose.mig.nestednachrichtenstruktur import NestedNachrichtenstruktur
from migmose.parsing import find_file_to_format, parse_raw_nachrichtenstrukturzeile


# add CLI logic
Expand Down Expand Up @@ -41,10 +43,12 @@ def main(input_dir: Path, output_dir, message_format: list[EdifactFormat]) -> No
"""
dict_files = find_file_to_format(message_format, input_dir)
for m_format, file in dict_files.items():
mig_table = parse_raw_nachrichtenstrukturzeile(file)
for item in mig_table:
print(item)
preliminary_output_as_json(mig_table, m_format, output_dir)
raw_lines = parse_raw_nachrichtenstrukturzeile(file)
nachrichtenstrukturtabelle = NachrichtenstrukturTabelle.init_raw_table(raw_lines)
nested_nachrichtenstruktur, _ = NestedNachrichtenstruktur.create_nested_nachrichtenstruktur(
nachrichtenstrukturtabelle
)
nested_nachrichtenstruktur.output_as_json(m_format, output_dir)


if __name__ == "__main__":
Expand Down
11 changes: 11 additions & 0 deletions src/migmose/mig/nachrichtenstruktur.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,14 @@ class for mig tables
"""

lines: list[NachrichtenstrukturZeile]

@classmethod
def init_raw_table(cls, raw_lines: list[str]) -> "NachrichtenstrukturTabelle":
"""
reads table as list of raw lines and returns a NachrichtenstrukturTabelle
consisting of NachrichtenstrukturZeilen
"""
collected_lines: list[NachrichtenstrukturZeile] = []
for raw_line in raw_lines:
collected_lines.append(NachrichtenstrukturZeile.init_raw_lines(raw_line))
return cls(lines=collected_lines)
43 changes: 36 additions & 7 deletions src/migmose/mig/nachrichtenstrukturzeile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
contains class for lines in mig tables
"""

from typing import Any

from pydantic import BaseModel


Expand All @@ -23,10 +25,37 @@ class for lines in mig tables, e.g. (ORDCHG):

zaehler: str
nr: str | None = None
bezeichnung: str | None = None
standard_status: str | None = None
bdew_status: str | None = None
standard_maximale_wiederholungen: int | None = None
bdew_maximale_wiederholungen: int | None = None
ebene: int | None = None
inhalt: str | None = None
bezeichnung: str
standard_status: str
bdew_status: str
standard_maximale_wiederholungen: int
bdew_maximale_wiederholungen: int
ebene: int
inhalt: str

@classmethod
def init_raw_lines(cls, raw_line: str) -> "NachrichtenstrukturZeile":
"""
reads one raw line and returns a NachrichtenstrukturZeile object
"""
fields = raw_line.split("\t")[1:]
field_names = [
"zaehler",
"nr",
"bezeichnung",
"standard_status",
"bdew_status",
"standard_maximale_wiederholungen",
"bdew_maximale_wiederholungen",
"ebene",
"inhalt",
]
is_line_segmentgroup = len(fields) == len(field_names) - 1
is_line_incomplete = len(fields) != len(field_names) and not is_line_segmentgroup

if is_line_segmentgroup:
field_names = field_names[:1] + field_names[2:]
if is_line_incomplete:
raise ValueError(f"Expected 8 or 9 fields, got {len(fields)}, line: {raw_line}")
field_dict: dict[str, Any] = dict(zip(field_names, fields))
return cls(**field_dict)
116 changes: 116 additions & 0 deletions src/migmose/mig/nestednachrichtenstruktur.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""
contains class for structured segmentgroups in mig tables. Builds table recursively.
"""

import json
from pathlib import Path
from types import NoneType
from typing import Optional, Tuple

from loguru import logger
from maus.edifact import EdifactFormat
from pydantic import BaseModel

from migmose.mig.nachrichtenstruktur import NachrichtenstrukturTabelle
from migmose.mig.nachrichtenstrukturzeile import NachrichtenstrukturZeile


class NestedNachrichtenstruktur(BaseModel):
"""
class for structured segmentgroups in mig tables. Builds table recursively. Inherits from NachrichtenstrukturZeile
e.g.(ORDCHG):
{
"segmente": [
{
"zaehler": "0160",
"nr": "7",
"bezeichnung": "NAD",
"standard_status": "M",
"bdew_status": "M",
"standard_maximale_wiederholungen": 1,
"bdew_maximale_wiederholungen": 1,
"ebene": 1,
"inhalt": "MP-ID Absender"
}
],
"segmentgruppen": [
{
"segmente": [
{
"zaehler": "0260",
"nr": "8",
"bezeichnung": "CTA",
"standard_status": "M",
"bdew_status": "M",
"standard_maximale_wiederholungen": 1,
"bdew_maximale_wiederholungen": 1,
"ebene": 2,
"inhalt": "Ansprechpartner"
},
{
"zaehler": "0270",
"nr": "9",
"bezeichnung": "COM",
"standard_status": "C",
"bdew_status": "R",
"standard_maximale_wiederholungen": 5,
"bdew_maximale_wiederholungen": 5,
"ebene": 3,
"inhalt": "Kommunikationsverbindung"
}
],
"segmentgruppen": []
}
]
}
"""

header_linie: Optional[NachrichtenstrukturZeile] = None
segmente: list[Optional[NachrichtenstrukturZeile]] = []
segmentgruppen: list[Optional["NestedNachrichtenstruktur"]] = []

@classmethod
def create_nested_nachrichtenstruktur(
cls, table: NachrichtenstrukturTabelle, header_line: Optional[NachrichtenstrukturZeile] = None, index: int = 0
) -> Tuple["NestedNachrichtenstruktur", int]:
"""init nested Nachrichtenstruktur"""
collected_segments: list[Optional[NachrichtenstrukturZeile]] = []
collected_segmentgroups: list[Optional["NestedNachrichtenstruktur"]] = []
i = index
while i < len(table.lines):
line = table.lines[i]
is_line_segmentgruppe = line.nr is None
if is_line_segmentgruppe:
added_segmentgroup, i = cls.create_nested_nachrichtenstruktur(table, line, i + 1)
collected_segmentgroups.append(added_segmentgroup)
else:
collected_segments.append(line)
i += 1
if i < len(table.lines) and not isinstance(header_line, NoneType):
is_next_line_segmentgruppe = table.lines[i].nr is None
is_current_ebene_greater_than_next_ebene = line.ebene > table.lines[i].ebene
is_current_header_ebene_greater_eq_than_next_ebene = header_line.ebene >= table.lines[i].ebene

if (
is_next_line_segmentgruppe and is_current_header_ebene_greater_eq_than_next_ebene
) or is_current_ebene_greater_than_next_ebene:
return (
cls(
header_linie=header_line,
segmente=collected_segments,
segmentgruppen=collected_segmentgroups,
),
i,
)
return cls(header_linie=header_line, segmente=collected_segments, segmentgruppen=collected_segmentgroups), i

def output_as_json(self, message_type: EdifactFormat, output_dir: Path) -> None:
"""
writes the NestedNachrichtenstruktur as json
"""
output_dir.mkdir(parents=True, exist_ok=True)
file_path = output_dir.joinpath(f"{message_type}_nested_nachrichtenstruktur.json")
structured_json = self.model_dump()
with open(file_path, "w", encoding="utf-8") as json_file:
json.dump(structured_json, json_file, indent=4)
logger.info(f"Wrote nested Nachrichtenstruktur for {message_type} to {file_path}")
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
contains class for trees consisting of segments of mig tables
"""

from typing import Optional

from pydantic import BaseModel

from migmose.mig.nachrichtenstrukturzeile import NachrichtenstrukturZeile


class BaumSegmentGruppe(NachrichtenstrukturZeile):
class ReducedNestedNachrichtenstruktur(BaseModel):
"""will contain the tree structure of nachrichtenstruktur tables"""

header_linie: Optional[NachrichtenstrukturZeile] = None
segmente: list[NachrichtenstrukturZeile]
segmentgruppe: list["BaumSegmentGruppe"]
segmentgruppe: list["ReducedNestedNachrichtenstruktur"]
59 changes: 0 additions & 59 deletions src/migmose/mig/segmentgruppe.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/migmose/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,5 @@ def parse_raw_nachrichtenstrukturzeile(input_path: Path) -> list[str]:
mig_tables.extend([row.text for row in docx_object._cells[ind + 1 :]])
break
# filter empty rows and headers
mig_tables = [row for row in mig_tables if row not in ("\n", nachrichtenstruktur_header)]
mig_tables = [row for row in mig_tables if row not in ("", "\n", nachrichtenstruktur_header)]
return mig_tables
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ deps =
setenv = PYTHONPATH = {toxinidir}/src
commands =
coverage run -m pytest --basetemp={envtmpdir} {posargs}
coverage html --omit .tox/*,unittests/*
coverage report --fail-under 80 --omit .tox/*,unittests/*
coverage html --omit .tox/*,unittests/*,src/migmose/__main__.py
coverage report --fail-under 80 --omit .tox/*,unittests/*,src/migmose/__main__.py

[testenv:compile_requirements]
deps =
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
259 changes: 0 additions & 259 deletions unittests/test_data/ORDCHG_MIG_1_1_info_20230331_v2.xml

This file was deleted.

Loading

0 comments on commit dbd8507

Please sign in to comment.