Skip to content

Commit 275f973

Browse files
committed
DM scraping tool: Add optional check against TOC
Adds tooling to compile the adoc and pull the TOC. Then runs a check that the element counts are the same. Note that this is a VERY rough check, and there are known differences. Right now it does nothing put print, but I'm hoping it will help us to ensure that the DM editor isn't regressing. I've listed some known differences at the top. I should build them into the code at some point, but haven't done this yet. Right now, since this is just a manual check anyway, the user can just confirm against the list of known ok differences.
1 parent 33a2c5f commit 275f973

File tree

1 file changed

+168
-19
lines changed

1 file changed

+168
-19
lines changed

scripts/spec_xml/generate_spec_xml.py

+168-19
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,167 @@
1515
# limitations under the License.
1616

1717
import glob
18+
import logging
1819
import os
1920
import re
2021
import subprocess
22+
import sys
23+
from dataclasses import dataclass, field
24+
from typing import Optional
2125

2226
import click
2327

2428
DEFAULT_CHIP_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
2529
DEFAULT_OUTPUT_DIR = os.path.abspath(os.path.join(DEFAULT_CHIP_ROOT, 'data_model'))
2630

2731

32+
# Known TOC differences
33+
# - NOC has an additional command section
34+
# - DiagnosticsGeneral has an additional DoNotUse attribute that does not appear in the TOC
35+
# - GeneralCommissioning has an additional section in the TOC for common error handling
36+
# - ballast configuration is missing two attributes from the TOC (D attributes)
37+
# - LevelControl has an additional attribute in the TOC for scene table extensions
38+
# - ModeSelect has an attitional attribute in the TOC for scene table extensions
39+
# - Scenes has an extra command in the TOC for usage notes
40+
# - onoff has an additional attribute in the TOC for scene table extensions
41+
# - color control is all over the place - ignore this for attributes.
42+
# - microwave oven has an additional TOC section in commands for operational state description
43+
# - EnergyEVSE - attributes SessionID, SessionDuration, SessionEnergyCharged and SessionEnergyDischarged are all collapsed into one section, meaning the TOC has three fewer sections than the XML
44+
45+
46+
# Known ok warnings
47+
# - ballast config has two D attributes (power on level and power on fade time) that don't have access
48+
49+
@dataclass
50+
class ElementCounts:
51+
attributes: int
52+
commands: int
53+
events: int
54+
55+
2856
def get_xml_path(filename, output_dir):
2957
xml = os.path.basename(filename).replace('.adoc', '.xml')
3058
return os.path.abspath(os.path.join(output_dir, xml))
3159

3260

61+
def get_toc(filename: str, dry_run: bool) -> Optional[str]:
62+
# -o - sends output to stdout so we can scrape it without needing an intermediate file
63+
cmd = ['asciidoctor', '-a', 'toc', '-a', 'toclevel=2', '-d', 'book', '-o', '-', filename]
64+
if dry_run:
65+
print(cmd)
66+
return
67+
try:
68+
process = subprocess.run(cmd, capture_output=True)
69+
html = process.stdout.decode('utf-8')
70+
except subprocess.CalledProcessError as e:
71+
logging.error("Unable to compile adoc file")
72+
return None
73+
74+
try:
75+
table_start = '<div id="toctitle">Table of Contents</div>'
76+
toc_start = html.index(table_start) + len(table_start)
77+
toc_end = html.index('</div>', toc_start)
78+
toc = html[toc_start: toc_end]
79+
except ValueError as e:
80+
logging.error("Unable to find TOC")
81+
return None
82+
83+
return toc
84+
85+
86+
def get_element_counts_from_toc(filename: str, dry_run: bool) -> None:
87+
toc = get_toc(filename, dry_run)
88+
if not toc:
89+
return ElementCounts(0, 0, 0)
90+
91+
def _get_single_element_count_from_toc(element_tag: str) -> int:
92+
try:
93+
start = toc.index(element_tag)
94+
end = toc.index('</ul>', start)
95+
# We don't care about the final </ul>, just the number of <li> items in between
96+
element_list = toc[start: end]
97+
return element_list.count('<li>')
98+
except ValueError:
99+
return 0
100+
101+
attributes = _get_single_element_count_from_toc('Attributes')
102+
commands = _get_single_element_count_from_toc("Commands")
103+
events = _get_single_element_count_from_toc("Events")
104+
return ElementCounts(attributes, commands, events)
105+
106+
107+
def scrape_cluster(filename: str, scraper: str, output_dir: str, dry_run: bool, verify: bool) -> None:
108+
clusters_output_dir = os.path.abspath(os.path.join(output_dir, 'clusters'))
109+
if not os.path.exists(clusters_output_dir):
110+
os.makedirs(clusters_output_dir)
111+
112+
xml_path = get_xml_path(filename, clusters_output_dir)
113+
cmd = [scraper, 'cluster', '-i', filename, '-o', xml_path, '-nd', '--define', 'in-progress']
114+
if dry_run:
115+
print(cmd)
116+
else:
117+
subprocess.run(cmd)
118+
119+
if verify:
120+
if not dry_run:
121+
verify_against_toc(filename, xml_path)
122+
123+
124+
def verify_against_toc(adoc_filename: str, xml_filename: str):
125+
DEFAULT_SPEC_PARSING_PATH = os.path.join(DEFAULT_CHIP_ROOT, 'src', 'python_testing')
126+
sys.path.append(DEFAULT_SPEC_PARSING_PATH)
127+
128+
from matter_testing_support import ProblemNotice
129+
from spec_parsing_support import XmlCluster, add_cluster_data_from_xml
130+
from global_attribute_ids import GlobalAttributeIds
131+
import xml.etree.ElementTree as ElementTree
132+
133+
xml_clusters: dict[int, XmlCluster] = {}
134+
pure_base_clusters: dict[str, XmlCluster] = {}
135+
ids_by_name: dict[str, int] = {}
136+
problems: list[ProblemNotice] = []
137+
try:
138+
tree = ElementTree.parse(xml_filename)
139+
root = tree.getroot()
140+
add_cluster_data_from_xml(root, xml_clusters, pure_base_clusters, ids_by_name, problems)
141+
except ElementTree.ParseError:
142+
logging.error(f"Error parsing the generated XML from {adoc_filename}")
143+
return
144+
except FileNotFoundError:
145+
logging.error(f'failed to generate an XML file for {adoc_filename} - consider whether this needs to be in the exclude list')
146+
return
147+
148+
counts = get_element_counts_from_toc(adoc_filename, dry_run=False)
149+
cluster_to_check = xml_clusters
150+
if len(xml_clusters.keys()) != 1:
151+
if len(pure_base_clusters.keys()) == 1:
152+
cluster_to_check = pure_base_clusters
153+
else:
154+
logging.error(f"Unexpected number of clusters in the file {adoc_filename} - unable to test against TOC")
155+
for problem in problems:
156+
print(str(problem))
157+
return
158+
159+
ok = True
160+
for c in cluster_to_check.values():
161+
non_global_attrs = [a for a in c.attributes if a not in [g.value for g in GlobalAttributeIds]]
162+
if len(non_global_attrs) != counts.attributes:
163+
logging.error(
164+
f"Unexpected number of attributes from {adoc_filename} - TOC lists {counts.attributes}, xml has {len(non_global_attrs)}")
165+
ok = False
166+
if (len(c.accepted_commands) + len(c.generated_commands)) != counts.commands:
167+
logging.error(
168+
f"Unexpected number of commands from {adoc_filename} - TOC lists {counts.commands}, xml has {len(c.accepted_commands)} accepted, {len(c.generated_commands)} generated")
169+
ok = False
170+
if len(c.events) != counts.events:
171+
logging.error(f"Unexpected number of events from {adoc_filename} - TOC lists {counts.events}, xml has {len(c.events)}")
172+
ok = False
173+
174+
if not ok:
175+
print("TOC:")
176+
print(get_toc(adoc_filename, False))
177+
178+
33179
@click.command()
34180
@click.option(
35181
'--scraper',
@@ -50,53 +196,56 @@ def get_xml_path(filename, output_dir):
50196
default=False,
51197
is_flag=True,
52198
help='Flag for dry run')
53-
def main(scraper, spec_root, output_dir, dry_run):
199+
@click.option(
200+
'--verify',
201+
default=False,
202+
is_flag=True,
203+
help="Verify cluster scrapes against TOC after scraping"
204+
)
205+
@click.option(
206+
'--single-cluster',
207+
type=str,
208+
help="scrape just this cluster file"
209+
)
210+
def main(scraper, spec_root, output_dir, dry_run, verify, single_cluster):
54211
# Clusters need to be scraped first because the cluster directory is passed to the device type directory
55-
scrape_clusters(scraper, spec_root, output_dir, dry_run)
212+
if single_cluster:
213+
scrape_cluster(single_cluster, scraper, output_dir, dry_run, verify)
214+
return
215+
scrape_clusters(scraper, spec_root, output_dir, dry_run, verify)
56216
scrape_device_types(scraper, spec_root, output_dir, dry_run)
57217
if not dry_run:
58218
dump_versions(scraper, spec_root, output_dir)
59219

60220

61-
def scrape_clusters(scraper, spec_root, output_dir, dry_run):
221+
def scrape_clusters(scraper, spec_root, output_dir, dry_run, verify):
62222
src_dir = os.path.abspath(os.path.join(spec_root, 'src'))
63223
sdm_clusters_dir = os.path.abspath(os.path.join(src_dir, 'service_device_management'))
64224
app_clusters_dir = os.path.abspath(os.path.join(src_dir, 'app_clusters'))
65225
dm_clusters_dir = os.path.abspath(os.path.join(src_dir, 'data_model'))
66226
media_clusters_dir = os.path.abspath(os.path.join(app_clusters_dir, 'media'))
67-
clusters_output_dir = os.path.abspath(os.path.join(output_dir, 'clusters'))
68227
dm_clusters_list = ['ACL-Cluster.adoc', 'Binding-Cluster.adoc', 'bridge-clusters.adoc',
69228
'Descriptor-Cluster.adoc', 'Group-Key-Management-Cluster.adoc', 'ICDManagement.adoc',
70229
'Label-Cluster.adoc']
71230
sdm_exclude_list = ['AdminAssistedCommissioningFlows.adoc', 'BulkDataExchange.adoc', 'CommissioningFlows.adoc',
72-
'DeviceCommissioningFlows.adoc', 'DistributedComplianceLedger.adoc', 'OTAFileFormat.adoc']
231+
'DeviceCommissioningFlows.adoc', 'DistributedComplianceLedger.adoc', 'OTAFileFormat.adoc', 'OTASoftwareUpdate.adoc',
232+
'Resource_Minima.adoc', 'WiFiPerDeviceCredentials.adoc', 'ClientSideLocalization.adoc']
73233
app_exclude_list = ['appliances.adoc', 'closures.adoc', 'general.adoc',
74-
'hvac.adoc', 'lighting.adoc', 'meas_and_sense.adoc', 'robots.adoc']
234+
'hvac.adoc', 'lighting.adoc', 'meas_and_sense.adoc', 'robots.adoc', 'energy_management.adoc', 'network_infrastructure.adoc']
75235
media_exclude_list = ['media.adoc', 'VideoPlayerArchitecture.adoc']
76236

77-
if not os.path.exists(clusters_output_dir):
78-
os.makedirs(clusters_output_dir)
79-
80-
def scrape_cluster(filename: str) -> None:
81-
xml_path = get_xml_path(filename, clusters_output_dir)
82-
cmd = [scraper, 'cluster', '-i', filename, '-o', xml_path, '-nd', '--define', 'in-progress']
83-
if dry_run:
84-
print(cmd)
85-
else:
86-
subprocess.run(cmd)
87-
88237
def scrape_all_clusters(dir: str, exclude_list: list[str] = []) -> None:
89238
for filename in glob.glob(f'{dir}/*.adoc'):
90239
if os.path.basename(filename) in exclude_list:
91240
continue
92-
scrape_cluster(filename)
241+
scrape_cluster(filename, scraper, output_dir, dry_run, verify)
93242

94243
scrape_all_clusters(sdm_clusters_dir, sdm_exclude_list)
95244
scrape_all_clusters(app_clusters_dir, app_exclude_list)
96245
scrape_all_clusters(media_clusters_dir, media_exclude_list)
97246
for f in dm_clusters_list:
98247
filename = f'{dm_clusters_dir}/{f}'
99-
scrape_cluster(filename)
248+
scrape_cluster(filename, scraper, output_dir, dry_run, verify)
100249

101250

102251
def scrape_device_types(scraper, spec_root, output_dir, dry_run):

0 commit comments

Comments
 (0)