parser = None
for c in charsets:
try:
- parser = p(filename, c)
+ parser = p(filename, c, log)
break
except UnicodeEncodeError:
pass
return j
-def parse_html(filename, charset=None):
+def parse_html(filename, charset=None, log=None):
infile = open(filename, 'r')
try:
root = BadDeclParser(infile, fromEncoding=charset)
except TypeError:
+ if log: log("TypeError")
return None
finally:
infile.close()
try:
head = root.html.head
except AttributeError:
+ if log: log("No HTML in root or no HEAD in HTML")
return None
if head is None:
"""
HTML Parser
- Written by BroytMann. Copyright (C) 1997-2007 PhiloSoft Design
+ Written by BroytMann. Copyright (C) 1997-2008 PhiloSoft Design
"""
from HTMLParser import HTMLParseError
self.icon = href
-def parse_html(filename, charset=None):
+def parse_html(filename, charset=None, log=None):
infile = open(filename, 'r')
parser = HTMLParser(charset)