Written by BroytMann. Copyright (C) 2007 PhiloSoft Design
"""
+from HTMLParser import HTMLParser
from BeautifulSoup import BeautifulSoup
-class DummyParser(object):
+class BSoupParser(HTMLParser):
def __init__(self, charset, meta, title, refresh, icon):
object.__init__(self)
self.charset = charset
self.refresh = refresh
self.icon = icon
+
def parse_html(filename, charset=None):
infile = open(filename, 'r')
root = BeautifulSoup(infile, fromEncoding=charset)
infile.close()
- charset = root.originalEncoding
+ _charset = root.originalEncoding
try:
- title = root.html.head.title.string.encode(charset)
+ title = root.html.head.title.string.encode(_charset)
except AttributeError:
title = ''
else:
icon = None
- parser = DummyParser(charset, False, title, refresh, icon)
+ parser = BSoupParser(_charset, _charset == charset, title, refresh, icon)
return parser
def _find_refresh(Tag):