Written by BroytMann. Copyright (C) 1997-2007 PhiloSoft Design
"""
+import codecs
+
from m_lib.defenc import default_encoding
current_charset = default_encoding.replace("windows-", "cp")
DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
for attrname, value in attrs:
if value:
value = value.strip().lower()
- if (attrname == 'rel') and (value in ('icon', 'shortcut icon')):
+ if (attrname == 'rel') and (value.lower() in ('icon', 'shortcut icon')):
has_icon = True
elif attrname == 'href':
href = value
if has_icon:
self.icon = href
- else:
- self.icon = None
import re
if __name__ == '__main__':
+ import sys
parser = parse_html(sys.argv[1])
print parser.charset
print parser.title