From 66ef97e93d66c741926db216c29dad6047c5d7f4 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Dec 2007 19:50:53 +0000 Subject: [PATCH] Inherit HTMLParser (for unescape). git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@107 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html_beautifulsoup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index 4f395a1..e0129fd 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -4,10 +4,11 @@ Written by BroytMann. Copyright (C) 2007 PhiloSoft Design """ +from HTMLParser import HTMLParser from BeautifulSoup import BeautifulSoup -class DummyParser(object): +class BSoupParser(HTMLParser): def __init__(self, charset, meta, title, refresh, icon): object.__init__(self) self.charset = charset @@ -16,6 +17,7 @@ class DummyParser(object): self.refresh = refresh self.icon = icon + def parse_html(filename, charset=None): infile = open(filename, 'r') root = BeautifulSoup(infile, fromEncoding=charset) @@ -47,7 +49,7 @@ def parse_html(filename, charset=None): else: icon = None - parser = DummyParser(charset, False, title, refresh, icon) + parser = BSoupParser(charset, False, title, refresh, icon) return parser def _find_refresh(Tag): -- 2.39.5