X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=parse_html%2Fbkmk_parse_html.py;h=c14d055dfe9213556b48bf60fc4ee467d3888a39;hb=b9c8d112b8d8d0f7c726ee7dd07a89b6569c90a1;hp=07fe32e3e7e6a658ec82f0123acff746a7ac0b01;hpb=b2b302b348f0b711557961c70744c818cff664f0;p=bookmarks_db.git diff --git a/parse_html/bkmk_parse_html.py b/parse_html/bkmk_parse_html.py index 07fe32e..c14d055 100644 --- a/parse_html/bkmk_parse_html.py +++ b/parse_html/bkmk_parse_html.py @@ -12,11 +12,7 @@ __all__ = ['parse_html', 'parse_filename', 'universal_charset'] import codecs - -universal_charset = "utf-8" -DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic - -parsers = [] +import os try: from . import bkmk_ph_beautifulsoup4 @@ -66,6 +62,11 @@ else: import re from htmlentitydefs import name2codepoint +universal_charset = "utf-8" +DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic + +parsers = [] + entity_re = re.compile("(&\w+;)") num_entity_re = re.compile("(&#[0-9]+;)") @@ -93,7 +94,6 @@ def recode_entities(title, charset): return ''.join(output) -import os BKMK_DEBUG_HTML_PARSERS = os.environ.get("BKMK_DEBUG_HTML_PARSERS")