def | __init__ (self, *args, **kwargs) |
def | start_meta (self, attrs) |
def | __getattr__ (self, methodName) |
def | __init__ (self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, convertEntities=None, selfClosingTags=None, isHTML=False) |
def | convert_charref (self, name) |
def | endData (self, containerClass=NavigableString) |
def | handle_charref (self, ref) |
def | handle_comment (self, text) |
def | handle_data (self, data) |
def | handle_decl (self, data) |
def | handle_entityref (self, ref) |
def | handle_pi (self, text) |
def | isSelfClosingTag (self, name) |
def | parse_declaration (self, i) |
def | popTag (self) |
def | pushTag (self, tag) |
def | reset (self) |
def | unknown_endtag (self, name) |
def | unknown_starttag (self, name, attrs, selfClosing=0) |
def | __call__ (self, *args, **kwargs) |
def | __contains__ (self, x) |
def | __delitem__ (self, key) |
def | __eq__ (self, other) |
def | __getitem__ (self, key) |
def | __init__ (self, parser, name, attrs=None, parent=None, previous=None) |
def | __iter__ (self) |
def | __len__ (self) |
def | __ne__ (self, other) |
def | __nonzero__ (self) |
def | __repr__ (self, encoding=DEFAULT_OUTPUT_ENCODING) |
def | __setitem__ (self, key, value) |
def | __str__ (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
def | __unicode__ (self) |
def | childGenerator (self) |
def | clear (self) |
def | decompose (self) |
def | fetchText (self, text=None, recursive=True, limit=None) |
def | find (self, name=None, attrs={}, recursive=True, text=None, **kwargs) |
def | findAll (self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) |
def | firstText (self, text=None, recursive=True) |
def | get (self, key, default=None) |
def | getString (self) |
def | getText (self, separator=u"") |
def | has_key (self, key) |
def | index (self, element) |
def | prettify (self, encoding=DEFAULT_OUTPUT_ENCODING) |
def | recursiveChildGenerator (self) |
def | renderContents (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
def | setString (self, string) |
def | append (self, tag) |
def | extract (self) |
def | findAllNext (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
def | findAllPrevious (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
def | findNext (self, name=None, attrs={}, text=None, **kwargs) |
def | findNextSibling (self, name=None, attrs={}, text=None, **kwargs) |
def | findNextSiblings (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
def | findParent (self, name=None, attrs={}, **kwargs) |
def | findParents (self, name=None, attrs={}, limit=None, **kwargs) |
def | findPrevious (self, name=None, attrs={}, text=None, **kwargs) |
def | findPreviousSibling (self, name=None, attrs={}, text=None, **kwargs) |
def | findPreviousSiblings (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
def | insert (self, position, newChild) |
def | nextGenerator (self) |
def | nextSiblingGenerator (self) |
def | parentGenerator (self) |
def | previousGenerator (self) |
def | previousSiblingGenerator (self) |
def | replaceWith (self, replaceWith) |
def | replaceWithChildren (self) |
def | setup (self, parent=None, previous=None) |
def | substituteEncoding (self, str, encoding=None) |
def | toEncoding (self, s, encoding=None) |
| declaredHTMLEncoding |
| originalEncoding |
| convertEntities |
| convertHTMLEntities |
| convertXMLEntities |
| currentData |
| currentTag |
| declaredHTMLEncoding |
| escapeUnrecognizedEntities |
| fromEncoding |
| hidden |
| instanceSelfClosingTags |
| literal |
| markup |
| markupMassage |
| originalEncoding |
| parseOnlyThese |
| previous |
| quoteStack |
| smartQuotesTo |
| tagStack |
| attrMap |
| attrs |
| containsSubstitutions |
| contents |
| convertHTMLEntities |
| convertXMLEntities |
| escapeUnrecognizedEntities |
| hidden |
| isSelfClosing |
| name |
| parserClass |
| next |
| nextSibling |
| parent |
| previous |
| previousSibling |
| string = property(getString, setString) |
| text = property(getText) |
The BeautifulSoup class is oriented towards skipping over
common HTML errors like unclosed tags. However, sometimes it makes
errors of its own. For instance, consider this fragment:
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
encounters the second 'b'. It will think the author wrote
"<b>Foo<b>Bar", and didn't close the first 'b' tag, because
there's no real-world reason to bold something that's already
bold. When it encounters '</b></b>' it will close two more 'b'
tags, for a grand total of three tags closed instead of two. This
can throw off the rest of your document structure. The same is
true of a number of other tags, listed below.
It's much more common for someone to forget to close a 'b' tag
than to actually use nested 'b' tags, and the BeautifulSoup class
handles the common case. This class handles the not-co-common
case: where you can't believe someone wrote what they did, but
it's valid HTML and BeautifulSoup screwed up by assuming it
wouldn't be.
Definition at line 1626 of file BeautifulSoup.py.