X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html_beautifulsoup.py;h=8b26d635b763e3eb2b822cd1a871110d1fbb4824;hb=d4e41686948360cf605081b602302750e61699fe;hp=580831070aae1abb3526921b6c4da41ef6176541;hpb=ffc8cd83bb4bc59f4b447da0e16dc9323bac6ccf;p=bookmarks_db.git diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index 5808310..8b26d63 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -6,18 +6,8 @@ import re from sgmllib import SGMLParser, SGMLParseError -from HTMLParser import HTMLParser from BeautifulSoup import BeautifulSoup, CData - - -class BSoupParser(HTMLParser): - def __init__(self, charset, meta, title, refresh, icon): - object.__init__(self) - self.charset = charset - self.meta_charset = meta - self.title = title - self.refresh = refresh - self.icon = icon +from parse_html_util import HTMLParser # http://groups.google.com/group/beautifulsoup/browse_thread/thread/69093cb0d3a3cf63 @@ -125,7 +115,7 @@ def parse_html(filename, charset=None, log=None): else: icon = None - return BSoupParser(_charset, meta_charset, title, refresh, icon) + return HTMLParser(_charset, meta_charset, title, refresh, icon) def _find_contenttype(Tag): return (Tag.name == "meta") and \