X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;ds=sidebyside;f=parse_html%2Fbkmk_ph_beautifulsoup4.py;h=10e06a97cd4fc4e25689cd3a92040878a5f2775a;hb=be7cec61fa405f38dea2edde623174ca47ca7dc3;hp=aad3f8fb9092bdba2daec856a93f4309f9714795;hpb=9e3b27b8e808eb8d1453f25ef733efb8cbc41d31;p=bookmarks_db.git diff --git a/parse_html/bkmk_ph_beautifulsoup4.py b/parse_html/bkmk_ph_beautifulsoup4.py index aad3f8f..10e06a9 100644 --- a/parse_html/bkmk_ph_beautifulsoup4.py +++ b/parse_html/bkmk_ph_beautifulsoup4.py @@ -15,6 +15,7 @@ import re from bs4 import BeautifulSoup from .bkmk_ph_util import HTMLParser +universal_charset = "utf-8" DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic def _parse_html(html_text, charset): @@ -80,7 +81,11 @@ def parse_html(html_text, charset=None, log=None): meta_charset = _charset = meta_content.lower() if title and (_charset or meta_charset): - title = title.encode(_charset or meta_charset) + try: + title = title.encode(_charset or meta_charset) + except LookupError: + title = title.encode(universal_charset) + _charset = universal_charset meta = head.find(_find_refresh, recursive=False) if meta: