|
def | __init__ (self, *args, **kwargs) |
|
def | start_meta (self, attrs) |
|
def | __getattr__ (self, methodName) |
|
def | __init__ (self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, convertEntities=None, selfClosingTags=None, isHTML=False) |
|
def | convert_charref (self, name) |
|
def | endData (self, containerClass=NavigableString) |
|
def | handle_charref (self, ref) |
|
def | handle_comment (self, text) |
|
def | handle_data (self, data) |
|
def | handle_decl (self, data) |
|
def | handle_entityref (self, ref) |
|
def | handle_pi (self, text) |
|
def | isSelfClosingTag (self, name) |
|
def | parse_declaration (self, i) |
|
def | popTag (self) |
|
def | pushTag (self, tag) |
|
def | reset (self) |
|
def | unknown_endtag (self, name) |
|
def | unknown_starttag (self, name, attrs, selfClosing=0) |
|
def | __call__ (self, *args, **kwargs) |
|
def | __contains__ (self, x) |
|
def | __delitem__ (self, key) |
|
def | __eq__ (self, other) |
|
def | __getitem__ (self, key) |
|
def | __init__ (self, parser, name, attrs=None, parent=None, previous=None) |
|
def | __iter__ (self) |
|
def | __len__ (self) |
|
def | __ne__ (self, other) |
|
def | __nonzero__ (self) |
|
def | __repr__ (self, encoding=DEFAULT_OUTPUT_ENCODING) |
|
def | __setitem__ (self, key, value) |
|
def | __str__ (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
|
def | __unicode__ (self) |
|
def | childGenerator (self) |
|
def | clear (self) |
|
def | decompose (self) |
|
def | fetchText (self, text=None, recursive=True, limit=None) |
|
def | find (self, name=None, attrs={}, recursive=True, text=None, **kwargs) |
|
def | findAll (self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) |
|
def | firstText (self, text=None, recursive=True) |
|
def | get (self, key, default=None) |
|
def | getString (self) |
|
def | getText (self, separator=u"") |
|
def | has_key (self, key) |
|
def | index (self, element) |
|
def | prettify (self, encoding=DEFAULT_OUTPUT_ENCODING) |
|
def | recursiveChildGenerator (self) |
|
def | renderContents (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
|
def | setString (self, string) |
|
def | append (self, tag) |
|
def | extract (self) |
|
def | findAllNext (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
|
def | findAllPrevious (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
|
def | findNext (self, name=None, attrs={}, text=None, **kwargs) |
|
def | findNextSibling (self, name=None, attrs={}, text=None, **kwargs) |
|
def | findNextSiblings (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
|
def | findParent (self, name=None, attrs={}, **kwargs) |
|
def | findParents (self, name=None, attrs={}, limit=None, **kwargs) |
|
def | findPrevious (self, name=None, attrs={}, text=None, **kwargs) |
|
def | findPreviousSibling (self, name=None, attrs={}, text=None, **kwargs) |
|
def | findPreviousSiblings (self, name=None, attrs={}, text=None, limit=None, **kwargs) |
|
def | insert (self, position, newChild) |
|
def | nextGenerator (self) |
|
def | nextSiblingGenerator (self) |
|
def | parentGenerator (self) |
|
def | previousGenerator (self) |
|
def | previousSiblingGenerator (self) |
|
def | replaceWith (self, replaceWith) |
|
def | replaceWithChildren (self) |
|
def | setup (self, parent=None, previous=None) |
|
def | substituteEncoding (self, str, encoding=None) |
|
def | toEncoding (self, s, encoding=None) |
|
This parser knows the following facts about HTML:
* Some tags have no closing tag and should be interpreted as being
closed as soon as they are encountered.
* The text inside some tags (ie. 'script') may contain tags which
are not really part of the document and which should be parsed
as text, not tags. If you want to parse the text as tags, you can
always fetch it and parse it explicitly.
* Tag nesting rules:
Most tags can't be nested at all. For instance, the occurance of
a <p> tag should implicitly close the previous <p> tag.
<p>Para1<p>Para2
should be transformed into:
<p>Para1</p><p>Para2
Some tags can be nested arbitrarily. For instance, the occurance
of a <blockquote> tag should _not_ implicitly close the previous
<blockquote> tag.
Alice said: <blockquote>Bob said: <blockquote>Blah
should NOT be transformed into:
Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
Some tags can be nested, but the nesting is reset by the
interposition of other tags. For instance, a <tr> tag should
implicitly close the previous <tr> tag within the same <table>,
but not close a <tr> tag in another table.
<table><tr>Blah<tr>Blah
should be transformed into:
<table><tr>Blah</tr><tr>Blah
but,
<tr>Blah<table><tr>Blah
should NOT be transformed into
<tr>Blah<table></tr><tr>Blah
Differing assumptions about tag nesting rules are a major source
of problems with the BeautifulSoup class. If BeautifulSoup is not
treating as nestable a tag your page author treats as nestable,
try ICantBelieveItsBeautifulSoup, MinimalSoup, or
BeautifulStoneSoup before writing your own subclass.
Definition at line 1470 of file BeautifulSoup.py.