X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html_beautifulsoup.py;h=c7263fe64b478769e931ab978e4e382ed2233d3c;hb=284b0935fc4237bda2e51229860c771f78887be2;hp=9b0faded06ab92cb951b844421555776690118ff;hpb=0b76120991af955d34a9376d44e1df719f7ac16c;p=bookmarks_db.git diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index 9b0fade..c7263fe 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -67,6 +67,9 @@ def parse_html(filename, charset=None): head = root.html # Some sites put TITLE in HTML without HEAD _charset = root.originalEncoding + if _charset == "windows-1252": # Replace default + _charset = DEFAULT_CHARSET + try: title = head.title.string.encode(_charset) except AttributeError: