Skip to content

Commit 201d5fa

Browse files
Update DM XML scraper script (#33452)
* scraper updates re-write this msg * Restyled by isort --------- Co-authored-by: Restyled.io <commits@restyled.io>
1 parent c419e2f commit 201d5fa

File tree

1 file changed

+77
-26
lines changed

1 file changed

+77
-26
lines changed

scripts/spec_xml/generate_spec_xml.py

+77-26
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import re
2121
import subprocess
2222
import sys
23+
import xml.etree.ElementTree as ElementTree
24+
from pathlib import Path
2325

2426
import click
2527

@@ -36,6 +38,20 @@ def get_xml_path(filename, output_dir):
3638
return os.path.abspath(os.path.join(output_dir, xml))
3739

3840

41+
def make_asciidoc(target: str, include_in_progress: bool, spec_dir: str, dry_run: bool) -> str:
42+
cmd = ['make', 'PRINT_FILENAMES=1']
43+
if include_in_progress:
44+
cmd.append('INCLUDE_IN_PROGRESS=1')
45+
cmd.append(target)
46+
if dry_run:
47+
print(cmd)
48+
return ''
49+
else:
50+
ret = subprocess.check_output(cmd, cwd=spec_dir).decode('UTF-8').rstrip()
51+
print(ret)
52+
return ret
53+
54+
3955
@click.command()
4056
@click.option(
4157
'--scraper',
@@ -56,16 +72,21 @@ def get_xml_path(filename, output_dir):
5672
default=False,
5773
is_flag=True,
5874
help='Flag for dry run')
59-
def main(scraper, spec_root, output_dir, dry_run):
75+
@click.option(
76+
'--include-in-progress',
77+
default=True,
78+
type=bool,
79+
help='Include in-progress items from spec')
80+
def main(scraper, spec_root, output_dir, dry_run, include_in_progress):
6081
# Clusters need to be scraped first because the cluster directory is passed to the device type directory
61-
scrape_clusters(scraper, spec_root, output_dir, dry_run)
62-
scrape_device_types(scraper, spec_root, output_dir, dry_run)
82+
scrape_clusters(scraper, spec_root, output_dir, dry_run, include_in_progress)
83+
scrape_device_types(scraper, spec_root, output_dir, dry_run, include_in_progress)
6384
if not dry_run:
6485
dump_versions(scraper, spec_root, output_dir)
6586
dump_cluster_ids(output_dir)
6687

6788

68-
def scrape_clusters(scraper, spec_root, output_dir, dry_run):
89+
def scrape_clusters(scraper, spec_root, output_dir, dry_run, include_in_progress):
6990
src_dir = os.path.abspath(os.path.join(spec_root, 'src'))
7091
sdm_clusters_dir = os.path.abspath(
7192
os.path.join(src_dir, 'service_device_management'))
@@ -74,42 +95,64 @@ def scrape_clusters(scraper, spec_root, output_dir, dry_run):
7495
media_clusters_dir = os.path.abspath(
7596
os.path.join(app_clusters_dir, 'media'))
7697
clusters_output_dir = os.path.abspath(os.path.join(output_dir, 'clusters'))
77-
dm_clusters_list = ['ACL-Cluster.adoc', 'Binding-Cluster.adoc', 'bridge-clusters.adoc',
78-
'Descriptor-Cluster.adoc', 'Group-Key-Management-Cluster.adoc', 'ICDManagement.adoc',
79-
'Label-Cluster.adoc']
80-
sdm_exclude_list = ['AdminAssistedCommissioningFlows.adoc', 'BulkDataExchange.adoc', 'CommissioningFlows.adoc',
81-
'DeviceCommissioningFlows.adoc', 'DistributedComplianceLedger.adoc', 'OTAFileFormat.adoc']
82-
app_exclude_list = ['appliances.adoc', 'closures.adoc', 'general.adoc',
83-
'hvac.adoc', 'lighting.adoc', 'meas_and_sense.adoc', 'robots.adoc']
84-
media_exclude_list = ['media.adoc', 'VideoPlayerArchitecture.adoc']
8598

8699
if not os.path.exists(clusters_output_dir):
87100
os.makedirs(clusters_output_dir)
88101

102+
print('Generating main spec to get file include list - this make take a few minutes')
103+
main_out = make_asciidoc('pdf', include_in_progress, spec_root, dry_run)
104+
print('Generating cluster spec to get file include list - this make take a few minutes')
105+
cluster_out = make_asciidoc('pdf-appclusters-book', include_in_progress, spec_root, dry_run)
106+
89107
def scrape_cluster(filename: str) -> None:
108+
base = Path(filename).stem
109+
if base not in main_out and base not in cluster_out:
110+
print(f'skipping file: {base} as it is not compiled into the asciidoc')
111+
return
90112
xml_path = get_xml_path(filename, clusters_output_dir)
91113
cmd = [scraper, 'cluster', '-i', filename, '-o',
92-
xml_path, '-nd', '--define', 'in-progress']
114+
xml_path, '-nd']
115+
if include_in_progress:
116+
cmd.extend(['--define', 'in-progress'])
93117
if dry_run:
94118
print(cmd)
95119
else:
96120
subprocess.run(cmd)
97121

98122
def scrape_all_clusters(dir: str, exclude_list: list[str] = []) -> None:
99123
for filename in glob.glob(f'{dir}/*.adoc'):
100-
if os.path.basename(filename) in exclude_list:
101-
continue
102124
scrape_cluster(filename)
103125

104-
scrape_all_clusters(sdm_clusters_dir, sdm_exclude_list)
105-
scrape_all_clusters(app_clusters_dir, app_exclude_list)
106-
scrape_all_clusters(media_clusters_dir, media_exclude_list)
107-
for f in dm_clusters_list:
108-
filename = f'{dm_clusters_dir}/{f}'
109-
scrape_cluster(filename)
110-
111-
112-
def scrape_device_types(scraper, spec_root, output_dir, dry_run):
126+
scrape_all_clusters(dm_clusters_dir)
127+
scrape_all_clusters(sdm_clusters_dir)
128+
scrape_all_clusters(app_clusters_dir)
129+
scrape_all_clusters(media_clusters_dir)
130+
131+
for xml_path in glob.glob(f'{clusters_output_dir}/*.xml'):
132+
tree = ElementTree.parse(f'{xml_path}')
133+
root = tree.getroot()
134+
cluster = next(root.iter('cluster'))
135+
# If there's no cluster ID table, this isn't a cluster
136+
try:
137+
next(cluster.iter('clusterIds'))
138+
except StopIteration:
139+
# If there's no cluster ID table, this isn't a cluster just some kind of intro adoc
140+
print(f'Removing file {xml_path} as it does not include any cluster definitions')
141+
os.remove(xml_path)
142+
continue
143+
# For now, we're going to manually remove the word "Cluster" from the cluster name field
144+
# to make the diff easier. The update to 1.2.4 of the scraper added this.
145+
# TODO: submit a separate PR with JUST this change revered and remove this code.
146+
with open(xml_path, 'rb') as input:
147+
xml_str = input.read()
148+
149+
original_name = bytes(cluster.attrib['name'], 'utf-8')
150+
replacement_name = bytes(cluster.attrib['name'].removesuffix(" Cluster"), 'utf-8')
151+
with open(xml_path, 'wb') as output:
152+
output.write(xml_str.replace(original_name, replacement_name))
153+
154+
155+
def scrape_device_types(scraper, spec_root, output_dir, dry_run, include_in_progress):
113156
device_type_dir = os.path.abspath(
114157
os.path.join(spec_root, 'src', 'device_types'))
115158
device_types_output_dir = os.path.abspath(
@@ -119,9 +162,16 @@ def scrape_device_types(scraper, spec_root, output_dir, dry_run):
119162
if not os.path.exists(device_types_output_dir):
120163
os.makedirs(device_types_output_dir)
121164

165+
print('Generating device type library to get file include list - this make take a few minutes')
166+
device_type_output = make_asciidoc('pdf-devicelibrary-book', include_in_progress, spec_root, dry_run)
167+
122168
def scrape_device_type(filename: str) -> None:
169+
base = Path(filename).stem
170+
if base not in device_type_output:
171+
print(f'skipping file: {filename} as it is not compiled into the asciidoc')
172+
return
123173
xml_path = get_xml_path(filename, device_types_output_dir)
124-
cmd = [scraper, 'devicetype', '-c', clusters_output_dir,
174+
cmd = [scraper, 'devicetype', '-c', '-cls', clusters_output_dir,
125175
'-nd', '-i', filename, '-o', xml_path]
126176
if dry_run:
127177
print(cmd)
@@ -187,7 +237,8 @@ def dump_cluster_ids(output_dir):
187237

188238
json_file = os.path.join(clusters_output_dir, 'cluster_ids.json')
189239
with open(json_file, "w") as outfile:
190-
json.dump(json_dict, outfile, indent=2)
240+
json.dump(json_dict, outfile, indent=4)
241+
outfile.write('\n')
191242

192243

193244
if __name__ == '__main__':

0 commit comments

Comments
 (0)