From 163b0e2db2d743501f80ac404007d421b868fece Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Wed, 11 Aug 2010 17:26:11 +0000 Subject: [PATCH] Moved HTMLParser from parse_html_beautifulsoup.py to parse_html_util.py. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@257 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html_beautifulsoup.py | 14 ++------------ Robots/parse_html_util.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 12 deletions(-) create mode 100644 Robots/parse_html_util.py diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index 5808310..8b26d63 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -6,18 +6,8 @@ import re from sgmllib import SGMLParser, SGMLParseError -from HTMLParser import HTMLParser from BeautifulSoup import BeautifulSoup, CData - - -class BSoupParser(HTMLParser): - def __init__(self, charset, meta, title, refresh, icon): - object.__init__(self) - self.charset = charset - self.meta_charset = meta - self.title = title - self.refresh = refresh - self.icon = icon +from parse_html_util import HTMLParser # http://groups.google.com/group/beautifulsoup/browse_thread/thread/69093cb0d3a3cf63 @@ -125,7 +115,7 @@ def parse_html(filename, charset=None, log=None): else: icon = None - return BSoupParser(_charset, meta_charset, title, refresh, icon) + return HTMLParser(_charset, meta_charset, title, refresh, icon) def _find_contenttype(Tag): return (Tag.name == "meta") and \ diff --git a/Robots/parse_html_util.py b/Robots/parse_html_util.py new file mode 100644 index 0000000..79890dd --- /dev/null +++ b/Robots/parse_html_util.py @@ -0,0 +1,16 @@ +""" + HTML parsers utilities + + Written by Broytman. Copyright (C) 2010 PhiloSoft Design +""" + +from HTMLParser import HTMLParser + +class HTMLParser(HTMLParser): + def __init__(self, charset, meta_charset, title, refresh, icon): + object.__init__(self) + self.charset = charset + self.meta_charset = meta_charset + self.title = title + self.refresh = refresh + self.icon = icon -- 2.39.2