|
def | __init__ (self, *args, **kwargs) |
|
def | start_meta (self, attrs) |
|
def | __getattr__ (self, methodName) |
|
def | __init__ (self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, convertEntities=None, selfClosingTags=None, isHTML=False) |
|
def | convert_charref (self, name) |
|
def | endData (self, containerClass=NavigableString) |
|
def | handle_charref (self, ref) |
|
def | handle_comment (self, text) |
|
def | handle_data (self, data) |
|
def | handle_decl (self, data) |
|
def | handle_entityref (self, ref) |
|
def | handle_pi (self, text) |
|
def | isSelfClosingTag (self, name) |
|
def | parse_declaration (self, i) |
|
def | popTag (self) |
|
def | pushTag (self, tag) |
|
def | reset (self) |
|
def | unknown_endtag (self, name) |
|
def | unknown_starttag (self, name, attrs, selfClosing=0) |
|
def | __call__ (self, *args, **kwargs) |
|
def | __contains__ (self, x) |
|
def | __delitem__ (self, key) |
|
def | __eq__ (self, other) |
|
def | __getitem__ (self, key) |
|
def | __init__ (self, parser, name, attrs=None, parent=None, previous=None) |
|
def | __iter__ (self) |
|
def | __len__ (self) |
|
def | __ne__ (self, other) |
|
def | __nonzero__ (self) |
|
def | __repr__ (self, encoding=DEFAULT_OUTPUT_ENCODING) |
|
def | __setitem__ (self, key, value) |
|
def | __str__ (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
|
def | __unicode__ (self) |
|
def | childGenerator (self) |
|
def | clear (self) |
|
def | decompose (self) |
|
def | fetchText (self, text=None, recursive=True, limit=None) |
|
def | find (self, name=None, attrs={}, recursive=True, text=None, **kwargs) |
|
def | findAll (self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) |
|
def | firstText (self, text=None, recursive=True) |
|
def | get (self, key, default=None) |
|
def | getString (self) |
|
def | getText (self, separator=u"") |
|
def | has_key (self, key) |
|
def | index (self, element) |
|
def | prettify (self, encoding=DEFAULT_OUTPUT_ENCODING) |
|
def | recursiveChildGenerator (self) |
|
def | renderContents (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
|
def | setString (self, string) |
|
def | append (self, tag) |
|
def | extract (self) |
|
def | findAllNext (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
|
def | findAllPrevious (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
|
def | findNext (self, name=None, attrs={}, text=None, **kwargs) |
|
def | findNextSibling (self, name=None, attrs={}, text=None, **kwargs) |
|
def | findNextSiblings (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
|
def | findParent (self, name=None, attrs={}, **kwargs) |
|
def | findParents (self, name=None, attrs={}, limit=None, **kwargs) |
|
def | findPrevious (self, name=None, attrs={}, text=None, **kwargs) |
|
def | findPreviousSibling (self, name=None, attrs={}, text=None, **kwargs) |
|
def | findPreviousSiblings (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
|
def | insert (self, position, newChild) |
|
def | nextGenerator (self) |
|
def | nextSiblingGenerator (self) |
|
def | parentGenerator (self) |
|
def | previousGenerator (self) |
|
def | previousSiblingGenerator (self) |
|
def | replaceWith (self, replaceWith) |
|
def | replaceWithChildren (self) |
|
def | setup (self, parent=None, previous=None) |
|
def | substituteEncoding (self, str, encoding=None) |
|
def | toEncoding (self, s, encoding=None) |
|
| declaredHTMLEncoding |
|
| originalEncoding |
|
| convertEntities |
|
| convertHTMLEntities |
|
| convertXMLEntities |
|
| currentData |
|
| currentTag |
|
| declaredHTMLEncoding |
|
| escapeUnrecognizedEntities |
|
| fromEncoding |
|
| hidden |
|
| instanceSelfClosingTags |
|
| literal |
|
| markup |
|
| markupMassage |
|
| originalEncoding |
|
| parseOnlyThese |
|
| previous |
|
| quoteStack |
|
| smartQuotesTo |
|
| tagStack |
|
| attrMap |
|
| attrs |
|
| containsSubstitutions |
|
| contents |
|
| convertHTMLEntities |
|
| convertXMLEntities |
|
| escapeUnrecognizedEntities |
|
| hidden |
|
| isSelfClosing |
|
| name |
|
| parserClass |
|
| next |
|
| nextSibling |
|
| parent |
|
| previous |
|
| previousSibling |
|
| string = property(getString, setString) |
|
| text = property(getText) |
|
The BeautifulSoup class is oriented towards skipping over
common HTML errors like unclosed tags. However, sometimes it makes
errors of its own. For instance, consider this fragment:
<b>Foo<b>Bar</b></b>
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
encounters the second 'b'. It will think the author wrote
"<b>Foo<b>Bar", and didn't close the first 'b' tag, because
there's no real-world reason to bold something that's already
bold. When it encounters '</b></b>' it will close two more 'b'
tags, for a grand total of three tags closed instead of two. This
can throw off the rest of your document structure. The same is
true of a number of other tags, listed below.
It's much more common for someone to forget to close a 'b' tag
than to actually use nested 'b' tags, and the BeautifulSoup class
handles the common case. This class handles the not-co-common
case: where you can't believe someone wrote what they did, but
it's valid HTML and BeautifulSoup screwed up by assuming it
wouldn't be.
Definition at line 1626 of file BeautifulSoup.py.