-"""
- Simple, strightforward robot
+"""Simple, strightforward robot
+
+This file is a part of Bookmarks database and Internet robot.
- Written by Oleg Broytman. Copyright (C) 2000-2010 PhiloSoft Design.
"""
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_simple', 'get_error']
+
+
import sys, os
import time, urllib
from base64 import b64encode
class RedirectException(Exception):
reloc_dict = {
301: "perm.",
- 302: "temp.",
+ 302: "temp2.",
+ 303: "temp3.",
+ 307: "temp7.",
"html": "html"
}
def __init__(self, errcode, newurl):
parser = parse_html(fname, charset, self.log)
if parser:
bookmark.real_title = parser.title
- if parser.refresh:
- refresh = parser.refresh
- try:
- url = refresh.split('=', 1)[1]
- except IndexError:
- url = "self"
- try:
- timeout = float(refresh.split(';')[0])
- except (IndexError, ValueError):
- raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh))
- else:
- try:
- timeout = int(refresh.split(';')[0])
- except ValueError:
- pass # float timeout
- raise RedirectException("html", "%s (%s sec)" % (url, timeout))
-
- # Get favicon.ico
icon = parser.icon
else:
icon = None
if icons[icon]:
bookmark.icon_href = icon
content_type, bookmark.icon = icons[icon]
- self.log(" cached icon: %s" % content_type)
+ self.log(" cached icon: %s" % content_type)
else:
- self.log(" cached icon: no icon")
+ self.log(" cached icon: no icon")
else:
try:
_icon = icon
icon_fname, headers = urllib.urlretrieve(_icon)
except RedirectException, e:
_icon = e.url
- self.log(" redirect to : %s" % _icon)
+ self.log(" redirect to : %s" % _icon)
else:
break
else:
except:
etype, emsg, tb = sys.exc_info()
self.log(" no icon : %s %s" % (etype, emsg))
- etype = None
- emsg = None
- tb = None
+ etype = emsg = tb = None
icons[icon] = None
else:
content_type = headers["Content-Type"]
- if content_type.startswith("image/"):
+ if content_type.startswith("application/") \
+ or content_type.startswith("image/") \
+ or content_type.startswith("text/plain"):
icon_file = open(icon_fname, "rb")
icon_data = icon_file.read()
icon_file.close()
bookmark.icon_href = icon
- bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data))
self.log(" got icon : %s" % content_type)
+ if content_type.startswith("application/") \
+ or content_type.startswith("text/plain"):
+ self.log(" non-image content type, assume x-icon")
+ content_type = 'image/x-icon'
+ bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data))
icons[icon] = (content_type, bookmark.icon)
else:
self.log(" no icon : bad content type '%s'" % content_type)
icons[icon] = None
+ if parser and parser.refresh:
+ refresh = parser.refresh
+ try:
+ url = refresh.split('=', 1)[1]
+ except IndexError:
+ url = "self"
+ try:
+ timeout = float(refresh.split(';')[0])
+ except (IndexError, ValueError):
+ raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh))
+ else:
+ try:
+ timeout = int(refresh.split(';')[0])
+ except ValueError:
+ pass # float timeout
+ raise RedirectException("html", "%s (%s sec)" % (url, timeout))
except KeyError, key:
self.log(" no header: %s" % key)
# Tested
return 1
-
def finish_check_url(self, bookmark, fname=None):
# Calculate these attributes even in case of an error
if fname and os.path.exists(fname):