Skip to content

Commit 664857b

Browse files
authored
Merge pull request #11 from JarrettR/refactor
Refactor
2 parents 0556635 + eb9c4b8 commit 664857b

36 files changed

+3888
-2197
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ __pycache__/
66
# C extensions
77
*.so
88

9+
*.svg
10+
911
# Distribution / packaging
1012
.Python
1113
build/
@@ -109,6 +111,7 @@ venv/
109111
ENV/
110112
env.bak/
111113
venv.bak/
114+
*.code-workspace
112115

113116
# Spyder project settings
114117
.spyderproject

__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .stretch_plugin_action import StretchPluginAction # Note the relative import!
22

33
StretchPluginAction('to_svg').register() # Instantiate and register to Pcbnew
4-
StretchPluginAction('to_pcb').register() # Instantiate and register to Pcbnew
4+
StretchPluginAction('to_pcb').register() # Instantiate and register to Pcbnew

bs/__init__.py

+72-17
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515
"""
1616

1717
__author__ = "Leonard Richardson (leonardr@segfault.org)"
18-
__version__ = "4.9.0"
18+
__version__ = "4.9.3"
1919
__copyright__ = "Copyright (c) 2004-2020 Leonard Richardson"
2020
# Use of this source code is governed by the MIT license.
2121
__license__ = "MIT"
2222

2323
__all__ = ['BeautifulSoup']
2424

25+
from collections import Counter
2526
import os
2627
import re
2728
import sys
@@ -39,15 +40,32 @@
3940
NavigableString,
4041
PageElement,
4142
ProcessingInstruction,
43+
PYTHON_SPECIFIC_ENCODINGS,
4244
ResultSet,
45+
Script,
46+
Stylesheet,
4347
SoupStrainer,
4448
Tag,
49+
TemplateString,
4550
)
4651

4752
# The very first thing we do is give a useful error if someone is
4853
# running this code under Python 3 without converting it.
4954
'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
5055

56+
# Define some custom warnings.
57+
class GuessedAtParserWarning(UserWarning):
58+
"""The warning issued when BeautifulSoup has to guess what parser to
59+
use -- probably because no parser was specified in the constructor.
60+
"""
61+
62+
class MarkupResemblesLocatorWarning(UserWarning):
63+
"""The warning issued when BeautifulSoup is given 'markup' that
64+
actually looks like a resource locator -- a URL or a path to a file
65+
on disk.
66+
"""
67+
68+
5169
class BeautifulSoup(Tag):
5270
"""A data structure representing a parsed HTML or XML document.
5371
@@ -93,7 +111,7 @@ class BeautifulSoup(Tag):
93111
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
94112

95113
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
96-
114+
97115
def __init__(self, markup="", features=None, builder=None,
98116
parse_only=None, from_encoding=None, exclude_encodings=None,
99117
element_classes=None, **kwargs):
@@ -235,7 +253,9 @@ def deprecated_argument(old_name, new_name):
235253
if not original_builder and not (
236254
original_features == builder.NAME or
237255
original_features in builder.ALTERNATE_NAMES
238-
):
256+
) and markup:
257+
# The user did not tell us which TreeBuilder to use,
258+
# and we had to guess. Issue a warning.
239259
if builder.is_xml:
240260
markup_type = "XML"
241261
else:
@@ -269,7 +289,10 @@ def deprecated_argument(old_name, new_name):
269289
parser=builder.NAME,
270290
markup_type=markup_type
271291
)
272-
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
292+
warnings.warn(
293+
self.NO_PARSER_SPECIFIED_WARNING % values,
294+
GuessedAtParserWarning, stacklevel=2
295+
)
273296
else:
274297
if kwargs:
275298
warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.")
@@ -309,7 +332,8 @@ def deprecated_argument(old_name, new_name):
309332
warnings.warn(
310333
'"%s" looks like a filename, not markup. You should'
311334
' probably open this file and pass the filehandle into'
312-
' Beautiful Soup.' % self._decode_markup(markup)
335+
' Beautiful Soup.' % self._decode_markup(markup),
336+
MarkupResemblesLocatorWarning
313337
)
314338
self._check_markup_is_url(markup)
315339

@@ -396,7 +420,8 @@ def _check_markup_is_url(cls, markup):
396420
' requests to get the document behind the URL, and feed'
397421
' that document to Beautiful Soup.' % cls._decode_markup(
398422
markup
399-
)
423+
),
424+
MarkupResemblesLocatorWarning
400425
)
401426

402427
def _feed(self):
@@ -422,13 +447,28 @@ def reset(self):
422447
self.current_data = []
423448
self.currentTag = None
424449
self.tagStack = []
450+
self.open_tag_counter = Counter()
425451
self.preserve_whitespace_tag_stack = []
426452
self.string_container_stack = []
427453
self.pushTag(self)
428454

429455
def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
430456
sourceline=None, sourcepos=None, **kwattrs):
431-
"""Create a new Tag associated with this BeautifulSoup object."""
457+
"""Create a new Tag associated with this BeautifulSoup object.
458+
459+
:param name: The name of the new Tag.
460+
:param namespace: The URI of the new Tag's XML namespace, if any.
461+
:param prefix: The prefix for the new Tag's XML namespace, if any.
462+
:param attrs: A dictionary of this Tag's attribute values; can
463+
be used instead of `kwattrs` for attributes like 'class'
464+
that are reserved words in Python.
465+
:param sourceline: The line number where this tag was
466+
(purportedly) found in its source document.
467+
:param sourcepos: The character position within `sourceline` where this
468+
tag was (purportedly) found.
469+
:param kwattrs: Keyword arguments for the new Tag's attribute values.
470+
471+
"""
432472
kwattrs.update(attrs)
433473
return self.element_classes.get(Tag, Tag)(
434474
None, self.builder, name, namespace, nsprefix, kwattrs,
@@ -458,13 +498,13 @@ def new_string(self, s, subclass=None):
458498
container = self.string_container(subclass)
459499
return container(s)
460500

461-
def insert_before(self, successor):
501+
def insert_before(self, *args):
462502
"""This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
463503
it because there is nothing before or after it in the parse tree.
464504
"""
465505
raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
466506

467-
def insert_after(self, successor):
507+
def insert_after(self, *args):
468508
"""This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
469509
it because there is nothing before or after it in the parse tree.
470510
"""
@@ -473,22 +513,26 @@ def insert_after(self, successor):
473513
def popTag(self):
474514
"""Internal method called by _popToTag when a tag is closed."""
475515
tag = self.tagStack.pop()
516+
if tag.name in self.open_tag_counter:
517+
self.open_tag_counter[tag.name] -= 1
476518
if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
477519
self.preserve_whitespace_tag_stack.pop()
478520
if self.string_container_stack and tag == self.string_container_stack[-1]:
479521
self.string_container_stack.pop()
480-
#print "Pop", tag.name
522+
#print("Pop", tag.name)
481523
if self.tagStack:
482524
self.currentTag = self.tagStack[-1]
483525
return self.currentTag
484526

485527
def pushTag(self, tag):
486528
"""Internal method called by handle_starttag when a tag is opened."""
487-
#print "Push", tag.name
529+
#print("Push", tag.name)
488530
if self.currentTag is not None:
489531
self.currentTag.contents.append(tag)
490532
self.tagStack.append(tag)
491533
self.currentTag = self.tagStack[-1]
534+
if tag.name != self.ROOT_TAG_NAME:
535+
self.open_tag_counter[tag.name] += 1
492536
if tag.name in self.builder.preserve_whitespace_tags:
493537
self.preserve_whitespace_tag_stack.append(tag)
494538
if tag.name in self.builder.string_containers:
@@ -599,15 +643,19 @@ def _linkage_fixer(self, el):
599643

600644
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
601645
"""Pops the tag stack up to and including the most recent
602-
instance of the given tag.
646+
instance of the given tag.
647+
648+
If there are no open tags with the given name, nothing will be
649+
popped.
603650
604651
:param name: Pop up to the most recent tag with this name.
605652
:param nsprefix: The namespace prefix that goes with `name`.
606653
:param inclusivePop: It this is false, pops the tag stack up
607654
to but *not* including the most recent instqance of the
608655
given tag.
656+
609657
"""
610-
#print "Popping to %s" % name
658+
#print("Popping to %s" % name)
611659
if name == self.ROOT_TAG_NAME:
612660
# The BeautifulSoup object itself can never be popped.
613661
return
@@ -616,6 +664,8 @@ def _popToTag(self, name, nsprefix=None, inclusivePop=True):
616664

617665
stack_size = len(self.tagStack)
618666
for i in range(stack_size - 1, 0, -1):
667+
if not self.open_tag_counter.get(name):
668+
break
619669
t = self.tagStack[i]
620670
if (name == t.name and nsprefix == t.prefix):
621671
if inclusivePop:
@@ -642,7 +692,7 @@ def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
642692
in the document. For instance, if this was a self-closing tag,
643693
don't call handle_endtag.
644694
"""
645-
# print "Start tag %s: %s" % (name, attrs)
695+
# print("Start tag %s: %s" % (name, attrs))
646696
self.endData()
647697

648698
if (self.parse_only and len(self.tagStack) <= 1
@@ -669,14 +719,14 @@ def handle_endtag(self, name, nsprefix=None):
669719
:param name: Name of the tag.
670720
:param nsprefix: Namespace prefix for the tag.
671721
"""
672-
#print "End tag: " + name
722+
#print("End tag: " + name)
673723
self.endData()
674724
self._popToTag(name, nsprefix)
675725

676726
def handle_data(self, data):
677727
"""Called by the tree builder when a chunk of textual data is encountered."""
678728
self.current_data.append(data)
679-
729+
680730
def decode(self, pretty_print=False,
681731
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
682732
formatter="minimal"):
@@ -691,6 +741,11 @@ def decode(self, pretty_print=False,
691741
if self.is_xml:
692742
# Print the XML declaration
693743
encoding_part = ''
744+
if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS:
745+
# This is a special Python encoding; it can't actually
746+
# go into an XML document because it means nothing
747+
# outside of Python.
748+
eventual_encoding = None
694749
if eventual_encoding != None:
695750
encoding_part = ' encoding="%s"' % eventual_encoding
696751
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
@@ -733,4 +788,4 @@ class FeatureNotFound(ValueError):
733788
if __name__ == '__main__':
734789
import sys
735790
soup = BeautifulSoup(sys.stdin)
736-
print soup.prettify()
791+
print(soup.prettify())

bs/builder/__init__.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -334,11 +334,11 @@ def close(self):
334334

335335
def startElement(self, name, attrs):
336336
attrs = dict((key[1], value) for key, value in list(attrs.items()))
337-
#print "Start %s, %r" % (name, attrs)
337+
#print("Start %s, %r" % (name, attrs))
338338
self.soup.handle_starttag(name, attrs)
339339

340340
def endElement(self, name):
341-
#print "End %s" % name
341+
#print("End %s" % name)
342342
self.soup.handle_endtag(name)
343343

344344
def startElementNS(self, nsTuple, nodeName, attrs):
@@ -476,8 +476,7 @@ def set_up_substitutions(self, tag):
476476

477477
def register_treebuilders_from(module):
478478
"""Copy TreeBuilders from the given module into this module."""
479-
# I'm fairly sure this is not the best way to do this.
480-
this_module = sys.modules['bs4.builder']
479+
this_module = sys.modules[__name__]
481480
for name in module.__all__:
482481
obj = getattr(module, name)
483482

bs/builder/_html5lib.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,9 @@ def removeChild(self, node):
375375

376376
def reparentChildren(self, new_parent):
377377
"""Move all of this tag's children into another tag."""
378-
# print "MOVE", self.element.contents
379-
# print "FROM", self.element
380-
# print "TO", new_parent.element
378+
# print("MOVE", self.element.contents)
379+
# print("FROM", self.element)
380+
# print("TO", new_parent.element)
381381

382382
element = self.element
383383
new_parent_element = new_parent.element
@@ -435,9 +435,9 @@ def reparentChildren(self, new_parent):
435435
element.contents = []
436436
element.next_element = final_next_element
437437

438-
# print "DONE WITH MOVE"
439-
# print "FROM", self.element
440-
# print "TO", new_parent_element
438+
# print("DONE WITH MOVE")
439+
# print("FROM", self.element)
440+
# print("TO", new_parent_element)
441441

442442
def cloneNode(self):
443443
tag = self.soup.new_tag(self.element.name, self.namespace)

0 commit comments

Comments
 (0)