X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html_beautifulsoup.py;h=25719ca9d643ad86e2c2844c572b938d1bf6f792;hb=5f637b5a5fe27098985975928632b9fea5ea3c62;hp=11db56371b0df277dd957484f782e54624721e40;hpb=fa4cc9afb8d911873776c26576ce95ab08cda099;p=bookmarks_db.git diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index 11db563..25719ca 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -49,11 +49,12 @@ class BadDeclParser(BeautifulSoup): return j -def parse_html(filename, charset=None): +def parse_html(filename, charset=None, log=None): infile = open(filename, 'r') try: root = BadDeclParser(infile, fromEncoding=charset) except TypeError: + if log: log("TypeError") return None finally: infile.close() @@ -61,6 +62,7 @@ def parse_html(filename, charset=None): try: head = root.html.head except AttributeError: + if log: log("No HTML in root or no HEAD in HTML") return None if head is None: