-
-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathdoc_entries.py
621 lines (512 loc) · 20.2 KB
/
doc_entries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
"""
Documentation entries and doctests
This module contains the objects representing the entries in the documentation
system, and the functions used to parse docstrings into these objects.
"""
import logging
import re
from os import getenv
from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Sequence
from mathics.core.evaluation import Message, Print, _Out
if TYPE_CHECKING:
from mathics.doc.structure import DocSection
# Used for getting test results by test expression and chapter/section information.
test_result_map: Dict[tuple, list] = {}
# These are all the XML/HTML-like tags that documentation supports.
ALLOWED_TAGS = (
"dl",
"dd",
"dt",
"em",
"url",
"ul",
"i",
"ol",
"li",
"con",
"console",
"img",
"imgpng",
"ref",
"subsection",
)
ALLOWED_TAGS_RE = dict(
(allowed, re.compile("<(%s.*?)>" % allowed)) for allowed in ALLOWED_TAGS
)
# This string is used, so we can indicate a trailing blank at the end of a line by
# adding this string to the end of the line which gets stripped off.
# Some editors and formatters like to strip off trailing blanks at the ends of lines.
END_LINE_SENTINAL = "#<--#"
# The regular expressions below (strings ending with _RE
# pull out information from docstring or text in a file. Ghetto parsing.
CONSOLE_RE = re.compile(r"(?s)<(?P<tag>con|console)>(?P<content>.*?)</(?P=tag)>")
DL_ITEM_RE = re.compile(
r"(?s)<(?P<tag>d[td])>(?P<content>.*?)(?:</(?P=tag)>|)\s*(?:(?=<d[td]>)|$)"
)
DL_RE = re.compile(r"(?s)<dl>(.*?)</dl>")
HYPERTEXT_RE = re.compile(
r"(?s)<(?P<tag>em|url)>(\s*:(?P<text>.*?):\s*)?(?P<content>.*?)</(?P=tag)>"
)
IMG_PNG_RE = re.compile(
r'<imgpng src="(?P<src>.*?)" title="(?P<title>.*?)" label="(?P<label>.*?)">'
)
IMG_RE = re.compile(
r'<img src="(?P<src>.*?)" title="(?P<title>.*?)" label="(?P<label>.*?)">'
)
# Preserve space before and after in-line code variables.
LATEX_RE = re.compile(r"(\s?)\$(\w+?)\$(\s?)")
LIST_ITEM_RE = re.compile(r"(?s)<li>(.*?)(?:</li>|(?=<li>)|$)")
LIST_RE = re.compile(r"(?s)<(?P<tag>ul|ol)>(?P<content>.*?)</(?P=tag)>")
MATHICS_RE = re.compile(r"(?<!\\)\'(.*?)(?<!\\)\'")
PYTHON_RE = re.compile(r"(?s)<python>(.*?)</python>")
QUOTATIONS_RE = re.compile(r"\"([\w\s,]*?)\"")
REF_RE = re.compile(r'<ref label="(?P<label>.*?)">')
SPECIAL_COMMANDS = {
"LaTeX": (r"<em>LaTeX</em>", r"\LaTeX{}"),
"Mathematica": (
r"<em>Mathematica</em>®",
r"\emph{Mathematica}\textregistered{}",
),
"Mathics": (r"<em>Mathics3</em>", r"\emph{Mathics3}"),
"Mathics3": (r"<em>Mathics3</em>", r"\emph{Mathics3}"),
"Sage": (r"<em>Sage</em>", r"\emph{Sage}"),
"Wolfram": (r"<em>Wolfram</em>", r"\emph{Wolfram}"),
"skip": (r"<br /><br />", r"\bigskip"),
}
TESTCASE_RE = re.compile(
r"""(?mx)^ # re.MULTILINE (multi-line match)
# and re.VERBOSE (readable regular expressions
((?:.|\n)*?)
^\s*([>#SX])>[ ](.*) # test-code indicator
((?:\n\s*(?:[:|=.][ ]|\.).*)*) # test-code results"""
)
TESTCASE_OUT_RE = re.compile(r"^\s*([:|=])(.*)$")
def get_results_by_test(test_expr: str, full_test_key: list, doc_data: dict) -> dict:
"""
Sometimes test numbering is off, either due to bugs or changes since the
data was read.
Here, we compensate for this by looking up the test by its chapter and section name
portion stored in `full_test_key` along with the and the test expression data
stored in `test_expr`.
This new key is looked up in `test_result_map` its value is returned.
`doc_data` is only first time this is called to populate `test_result_map`.
"""
# Strip off the test index form new key with this and the test string.
# Add to any existing value for that "result". This is now what we want to
# use as a tee in test_result_map to look for.
test_section = list(full_test_key)[:-1]
search_key = tuple(test_section)
if not test_result_map:
# Populate test_result_map from doc_data
for key, result in doc_data.items():
test_section = list(key)[:-1]
new_test_key = tuple(test_section)
next_result = test_result_map.get(new_test_key, None)
if next_result is None:
next_result = [result]
else:
next_result.append(result)
test_result_map[new_test_key] = next_result
results = test_result_map.get(search_key, None)
result = {}
if results:
for result_candidate in results:
if result_candidate["query"] == test_expr:
if result:
# Already found something
logging.warning(
f"Warning, multiple results appear under {search_key}."
)
return {}
result = result_candidate
return result
def filter_comments(doc: str) -> str:
"""Remove docstring documentation comments. These are lines
that start with ##"""
return "\n".join(
line for line in doc.splitlines() if not line.lstrip().startswith("##")
)
POST_SUBSTITUTION_TAG = "_POST_SUBSTITUTION%d_"
def pre_sub(regexp, text: str, repl_func: Callable):
"""apply substitutions previous to parse the text"""
post_substitutions: List[str] = []
def repl_pre(match):
repl = repl_func(match)
index = len(post_substitutions)
post_substitutions.append(repl)
return POST_SUBSTITUTION_TAG % index
text = regexp.sub(repl_pre, text)
return text, post_substitutions
def post_sub(text: str, post_substitutions) -> str:
"""apply substitutions after parsing the doctests."""
for index, sub in enumerate(post_substitutions):
text = text.replace(POST_SUBSTITUTION_TAG % index, sub)
return text
def parse_docstring_to_DocumentationEntry_items(
doc: str,
test_collection_constructor: Callable,
test_case_constructor: Callable,
text_constructor: Callable,
key_part=None,
) -> List["DocumentationEntry"]:
"""
This parses string `doc` (using regular expressions) into Python objects.
The function returns a list of ``DocText`` and ``DocTests`` objects which
are contained in a ``DocumentationElement``.
test_collection_constructor() is the class constructor call to create an
object for the test collection.
Each test is created via test_case_constructor().
Text within the test is stored via text_constructor.
"""
# This function is used to populate a ``DocumentEntry`` element, that
# in principle is not associated to any container
# (``DocChapter``/``DocSection``/``DocSubsection``)
# of the documentation system.
#
# The ``key_part`` parameter was used to set the ``key`` of the
# ``DocTest`` elements. This attribute
# should be set just after the ``DocumentationEntry`` (
# to which the tests belongs) is associated
# to a container, by calling ``container.set_parent_path``.
# However, the parameter is still used in MathicsDjango, so let's
# keep it and discard its value.
#
if key_part:
logging.warning("``key_part`` is deprecated. Its value is discarded.")
# Remove commented lines.
doc = filter_comments(doc) # .strip("\s")
# pre-substitute Python code because it might contain tests
doc, post_substitutions = pre_sub(
PYTHON_RE, doc, lambda m: "<python>%s</python>" % m.group(1)
)
# HACK: Artificially construct a last testcase to get the "intertext"
# after the last (real) testcase. Ignore the test, of course.
doc += "\n >> test\n = test"
testcases = TESTCASE_RE.findall(doc)
tests = None
items = []
for index, test_case in enumerate(testcases):
testcase = list(test_case)
text = testcase.pop(0).strip()
if text:
if tests is not None:
items.append(tests)
tests = None
text = post_sub(text, post_substitutions)
# Remove line breaks
text = re.sub(r" \\\n[ ]*", r" ", text)
text = re.sub(r"\\\n[ ]*", r" ", text)
items.append(text_constructor(text))
tests = None
if index < len(testcases) - 1:
test = test_case_constructor(index, testcase, None)
if tests is None:
tests = test_collection_constructor()
tests.tests.append(test)
# If the last block in the loop was not a Text block, append the
# last set of tests.
if tests is not None:
items.append(tests)
tests = None
return items
class DocTest:
"""
Class to hold a single doctest.
DocTest formatting rules:
* `>>` Marks test case; it will also appear as part of
the documentation.
* `#>` Marks test private or one that does not appear as part of
the documentation.
* `X>` Shows the example in the docs, but disables testing the example.
* `S>` Shows the example in the docs, but disables testing if environment
variable SANDBOX is set.
* `=` Compares the result text.
* `:` Compares an (error) message.
`|` Prints output.
"""
def __init__(
self,
index: int,
testcase: List[str],
key_prefix: Optional[tuple] = None,
):
def strip_sentinal(line: str):
"""Remove END_LINE_SENTINAL from the end of a line if it appears.
Some editors like to strip blanks at the end of a line.
Since the line ends in END_LINE_SENTINAL which isn't blank,
any blanks that appear before will be preserved.
Some tests require some lines to be blank or entry because
Mathics3 output can be that way
"""
if line.endswith(END_LINE_SENTINAL):
line = line[: -len(END_LINE_SENTINAL)]
# Also remove any remaining trailing blanks since that
# seems *also* what we want to do.
return line.strip()
self.index = index
self.outs: List[_Out] = []
self.result = None
# Private test cases are executed, but NOT shown as part of the docs
self.private = testcase[0] == "#"
# Ignored test cases are NOT executed, but shown as part of the docs
# Sandboxed test cases are NOT executed if environment SANDBOX is set
if testcase[0] == "X" or (testcase[0] == "S" and getenv("SANDBOX", False)):
self.ignore = True
# substitute '>' again so we get the correct formatting
testcase[0] = ">"
else:
self.ignore = False
self.test = strip_sentinal(testcase[1])
self._key: Optional[tuple] = key_prefix + (index,) if key_prefix else None
outs = testcase[2].splitlines()
for line in outs:
line = strip_sentinal(line)
if line:
if line.startswith("."):
text = line[1:]
if text.startswith(" "):
text = text[1:]
text = "\n" + text
if self.result is not None:
self.result += text
elif self.outs:
self.outs[-1].text += text
continue
match = TESTCASE_OUT_RE.match(line)
if not match:
continue
symbol, text = match.group(1), match.group(2)
text = text.strip()
if symbol == "=":
self.result = text
elif symbol == ":":
self.outs.append(Message("", "", text))
elif symbol == "|":
self.outs.append(Print(text))
def __str__(self) -> str:
return self.test
def compare(self, result: Optional[str], out: tuple = tuple()) -> bool:
"""
Performs a doctest comparison between ``result`` and ``wanted`` and returns
True if the test should be considered a success.
"""
return self.compare_result(result) and self.compare_out(out)
def compare_result(self, result: Optional[str]):
"""Compare a result with the expected result"""
wanted = self.result
# Check result
if wanted in ("...", result):
return True
if result is None or wanted is None:
return False
result_list = result.splitlines()
wanted_list = wanted.splitlines()
if result_list == [] and wanted_list == ["#<--#"]:
return True
if len(result_list) != len(wanted_list):
return False
for res, want in zip(result_list, wanted_list):
wanted_re = re.escape(want.strip())
wanted_re = wanted_re.replace("\\.\\.\\.", ".*?")
wanted_re = f"^{wanted_re}$"
if not re.match(wanted_re, res.strip()):
return False
return True
def compare_out(self, outs: tuple = tuple()) -> bool:
"""Compare messages and warnings produced during the evaluation of
the test with the expected messages and warnings."""
# Check out
wanted_outs = self.outs
if len(wanted_outs) == 1 and wanted_outs[0].text == "...":
# If we have ... don't check
return True
if len(outs) != len(wanted_outs):
# Mismatched number of output lines, and we don't have "..."
return False
# Python 3.13 replaces tabs by a single space in docstrings.
# In doctests we replace tabs by sequences of four spaces.
def tabs_to_spaces(val):
return val.text.replace("\t", 4 * " ")
# Need to check all output line by line
for got, wanted in zip(outs, wanted_outs):
if wanted.text == "...":
return True
if not tabs_to_spaces(got) == tabs_to_spaces(wanted):
return False
return True
@property
def key(self):
"""key identifier of the test"""
return self._key if hasattr(self, "_key") else None
@key.setter
def key(self, value):
"""setter for the key identifier of the test"""
assert self.key is None
self._key = value
return self._key
class DocTests:
"""
A bunch of consecutive ``DocTest`` objects extracted from a Builtin docstring.
"""
def __init__(self):
self.tests = []
self.text = ""
def get_tests(self) -> list:
"""
Returns lists test objects.
"""
return self.tests
# def is_private(self) -> bool:
# """Returns True if this test is "private" not supposed to be visible as example documentation."""
# return all(test.private for test in self.tests)
def __str__(self) -> str:
return "\n".join(str(test) for test in self.tests)
def test_indices(self) -> List[int]:
"""indices of the tests"""
return [test.index for test in self.tests]
class DocText:
"""
Class to hold some (non-test) text.
Some of the kinds of tags you may find here are showing in global ALLOWED_TAGS.
Some text may be marked with surrounding "$" or "'".
The code here however does not make use of any of the tagging.
"""
def __init__(self, text):
self.text = text
def __str__(self) -> str:
return self.text
def get_tests(self) -> list:
"""
Return tests in a DocText item - there never are any.
"""
return []
# def is_private(self) -> bool:
# """the test is private, meaning that it will not be included in the
# documentation, but tested in the doctest cycle."""
# return False
def test_indices(self) -> List[int]:
"""indices of the tests"""
return []
# Former XMLDoc
class DocumentationEntry:
"""
A class to hold the content of a documentation entry,
in our custom XML-like format.
Describes the contain of an entry in the documentation system, as a
sequence (list) of items of the clase `DocText` and `DocTests`.
``DocText`` items contains an internal XML-like formatted text. ``DocTests`` entries
contain one or more `DocTest` element.
Each level of the Documentation hierarchy contains an XMLDoc, describing the
content after the title and before the elements of the next level. For example,
in ``DocChapter``, ``DocChapter.doc`` contains the text coming after the title
of the chapter, and before the sections in `DocChapter.sections`.
Specialized classes like LaTeXDoc or and DjangoDoc provide methods for
getting formatted output. For LaTeXDoc ``latex()`` is added while for
DjangoDoc ``html()`` is added
Mathics core also uses this in getting usage strings (`??`).
"""
items: Sequence["DocumentationEntry"]
def __init__(
self, doc_str: str, title: str, section: Optional["DocSection"] = None
):
self._set_classes()
self.title = title
self.path = None
if section:
chapter = section.chapter
part = chapter.part
# Note: we elide section.title
key_prefix = (part.title, chapter.title, title)
else:
key_prefix = None
self.key_prefix = key_prefix
self.rawdoc = doc_str
self.items = parse_docstring_to_DocumentationEntry_items(
self.rawdoc,
self.docTest_collection_class,
self.docTest_class,
self.docText_class,
None,
)
def _set_classes(self):
"""
Tells to the initializator the classes to be used to build the items.
This must be overloaded by the daughter classes.
"""
if not hasattr(self, "docTest_collection_class"):
self.docTest_collection_class = DocTests
self.docTest_class = DocTest
self.docText_class = DocText
def __str__(self) -> str:
return "\n\n".join(str(item) for item in self.items)
def text(self) -> str:
"""text version of the documentation entry"""
# used for introspection
# TODO parse XML and pretty print
# HACK
item = str(self.items[0])
item = "\n".join(line.strip() for line in item.split("\n"))
item = item.replace("<dl>", "")
item = item.replace("</dl>", "")
item = item.replace("<dt>", " ")
item = item.replace("</dt>", "")
item = item.replace("<dd>", " ")
item = item.replace("</dd>", "")
item = "\n".join(line for line in item.split("\n") if not line.isspace())
item = re.sub(r"\$([0-9a-zA-Z]*)\$", r"\1", item)
return item
def get_tests(self) -> List["DocTest"]:
"""retrieve a list of tests in the documentation entry"""
tests = []
for item in self.items:
tests.extend(item.get_tests())
return tests
def set_parent_path(self, parent):
"""Set the parent path"""
self.path = None
path = []
while hasattr(parent, "parent"):
path = [parent.title] + path
parent = parent.parent
if hasattr(parent, "title"):
path = [parent.title] + path
if path:
self.path = path
# Set the key on each test
for test in self.get_tests():
assert test.key is None
# For backward compatibility, we need
# to reduce this to three fields.
# TODO: remove me and ensure that this
# works here and in Mathics Django
if len(path) > 3:
path = path[:2] + [path[-1]]
test.key = tuple(path) + (test.index,)
return self
class Tests:
"""
A group of tests in the same section or subsection.
"""
def __init__(
self,
part_name: str,
chapter_name: str,
section_name: str,
doctests: List[DocTest],
subsection_name: Optional[str] = None,
):
self.part = part_name
self.chapter = chapter_name
self.section = section_name
self.subsection = subsection_name
self.tests = doctests
self._key = None
@property
def key(self):
"""key of the tests"""
return self._key
@key.setter
def key(self, value):
assert self._key is None
self._key = value
return self._key