X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=parse_html%2Fbkmk_parse_html.py;h=5da37a98ab261f238f2e6cb78e29adeb826f0ebb;hb=c2ea4e82718b903aa123dd77490f36657383b0ca;hp=af9395b0d8d735689288a02071a0e1aee80814de;hpb=4ced9933b914c5569cba1c29c9ae21c3de123acc;p=bookmarks_db.git diff --git a/parse_html/bkmk_parse_html.py b/parse_html/bkmk_parse_html.py index af9395b..5da37a9 100644 --- a/parse_html/bkmk_parse_html.py +++ b/parse_html/bkmk_parse_html.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design" +__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['parse_html', 'parse_filename', 'universal_charset'] @@ -14,7 +14,7 @@ __all__ = ['parse_html', 'parse_filename', 'universal_charset'] import codecs universal_charset = "utf-8" -DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic +DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic parsers = [] @@ -86,7 +86,7 @@ def recode_entities(title, charset): try: part = unichr(int(part[2:-1])).encode(charset) except UnicodeEncodeError: - pass # Leave the entity as is + pass # Leave the entity as is output.append(part) return ''.join(output) @@ -101,7 +101,7 @@ def parse_html(html_text, charset=None, log=None): if charset: try: - codecs.lookup(charset) # In case of unknown charset... + codecs.lookup(charset) # In case of unknown charset... except (ValueError, LookupError): charset = None # ...try charset from HTML