"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['robot_base', 'get_error']
from parse_html import parse_html
+# Fake headers to pretend this is a real browser
+_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
+" Gecko/20001221 Firefox/2.0.0"
+_x_user_agent = "bookmarks_db (Python %d.%d.%d)" % sys.version_info[:3]
+
+request_headers = {
+ 'Accept': '*/*',
+ 'Accept-Language': 'ru,en',
+ 'Cache-Control': 'max-age=300',
+ 'Connection': 'close',
+ 'Referer': '/',
+ 'User-Agent': _user_agent,
+ 'X-User-Agent': _x_user_agent,
+}
+
+
reloc_dict = {
301: "perm1.",
302: "temp2.",
break
content_stripped = content.strip()
if content_stripped and charset:
- content_stripped = content_stripped.decode(
- charset, 'replace')
+ try:
+ content_stripped = content_stripped.decode(
+ charset, 'replace')
+ except LookupError:
+ charset = None
+ self.log(" unknown charset "
+ "in Content-Type header")
if content_stripped and is_html:
parser = parse_html(
content_stripped, charset, self.log)
if icons[icon_url]:
bookmark.icon_href = icon_url
content_type, bookmark.icon = icons[icon_url]
- self.log(" cached icon: %s" % content_type)
+ self.log(" cached icon : %s"
+ % content_type)
else:
- self.log(" cached icon: no icon")
+ self.log(" cached icon : no icon")
+ elif icon_url.startswith('data:'):
+ content_type, icon_data = \
+ icon_url[len('data:'):].split(',', 1)
+ bookmark.icon_href = bookmark.icon = icon_url
+ self.log(" got data icon : %s" % content_type)
+ icons[icon_url] = (content_type, icon_url)
else:
try:
_icon_url = icon_url
bookmark.icon
)
else:
- self.log(" no icon :"
+ self.log(" no icon : "
"bad content type '%s'"
% content_type
)