from base64 import b64encode
-from urllib.parse import urlsplit, urljoin
+from urllib.parse import urljoin
import sys
import socket
import time
from parse_html import parse_html
+# Fake headers to pretend this is a real browser
+_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
+" Gecko/20001221 Firefox/2.0.0"
+_x_user_agent = "bookmarks_db (Python %d.%d.%d)" % sys.version_info[:3]
+
+request_headers = {
+ 'Accept': '*/*',
+ 'Accept-Language': 'ru,en',
+ 'Cache-Control': 'max-age=300',
+ 'Connection': 'close',
+ 'Referer': '/',
+ 'User-Agent': _user_agent,
+ 'X-User-Agent': _x_user_agent,
+}
+
+
reloc_dict = {
301: "perm1.",
302: "temp2.",
self.start = int(time.time())
bookmark.icon = None
- split_results = urlsplit(bookmark.href)
- url_type, netloc, url_path, query, url_tag = split_results
- url_host = split_results.hostname
-
- url = "%s://%s%s" % (url_type, url_host, url_path)
error, redirect_code, redirect_to, headers, content = \
- self.get(bookmark, url, True)
+ self.get(bookmark, bookmark.href, True)
if error:
bookmark.error = error
icon = None
if not icon:
icon = "/favicon.ico"
- icon_url = urljoin(
- "%s://%s%s" % (url_type, url_host, url_path), icon)
+ icon_url = urljoin(bookmark.href, icon)
self.log(" looking for icon at: %s" % icon_url)
if icon_url in icons:
if icons[icon_url]:
bookmark.icon_href = icon_url
content_type, bookmark.icon = icons[icon_url]
- self.log(" cached icon: %s" % content_type)
+ self.log(" cached icon : %s"
+ % content_type)
else:
- self.log(" cached icon: no icon")
+ self.log(" cached icon : no icon")
+ elif icon_url.startswith('data:'):
+ content_type, icon_data = \
+ icon_url[len('data:'):].split(',', 1)
+ bookmark.icon_href = bookmark.icon = icon_url
+ self.log(" got data icon : %s" % content_type)
+ icons[icon_url] = (content_type, icon_url)
else:
try:
_icon_url = icon_url
self.log(" no header: %s" % key)
md5 = md5wrapper()
- if url_type == "ftp": # Pass welcome message through MD5
+ if bookmark.href.startswith("ftp://"):
+ # Pass welcome message through MD5
ftp_welcome = self.get_ftp_welcome()
if not isinstance(ftp_welcome, bytes):
ftp_welcome = ftp_welcome.encode(charset or 'utf-8')