"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html', 'parse_filename', 'universal_charset']
import codecs
universal_charset = "utf-8"
-DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
+DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
parsers = []
try:
part = unichr(int(part[2:-1])).encode(charset)
except UnicodeEncodeError:
- pass # Leave the entity as is
+ pass # Leave the entity as is
output.append(part)
return ''.join(output)
if charset:
try:
- codecs.lookup(charset) # In case of unknown charset...
+ codecs.lookup(charset) # In case of unknown charset...
except (ValueError, LookupError):
charset = None # ...try charset from HTML