from parse_html import parse_html
+# Fake headers to pretend this is a real browser
+_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
+" Gecko/20001221 Firefox/2.0.0"
+_x_user_agent = "bookmarks_db (Python %d.%d.%d)" % sys.version_info[:3]
+
+request_headers = {
+ 'Accept': '*/*',
+ 'Accept-Language': 'ru,en',
+ 'Cache-Control': 'max-age=300',
+ 'Connection': 'close',
+ 'Referer': '/',
+ 'User-Agent': _user_agent,
+ 'X-User-Agent': _x_user_agent,
+}
+
+
reloc_dict = {
301: "perm1.",
302: "temp2.",
break
content_stripped = content.strip()
if content_stripped and charset:
- content_stripped = content_stripped.decode(
- charset, 'replace')
+ try:
+ content_stripped = content_stripped.decode(
+ charset, 'replace')
+ except LookupError:
+ charset = None
+ self.log(" unknown charset "
+ "in Content-Type header")
if content_stripped and is_html:
parser = parse_html(
content_stripped, charset, self.log)