X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;ds=sidebyside;f=Robots%2Fbkmk_robot_base.py;h=eb69c28bb31ed783073b318dc3722a706bb8e9ff;hb=71900f3630cb51580964038b78100d60e3671981;hp=057fceff7d7370c1cff4a34957118f42ee955665;hpb=917353d7c874075d3709f9295dc231d8894e61a4;p=bookmarks_db.git diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index 057fcef..eb69c28 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -11,38 +11,36 @@ __license__ = "GNU GPL" __all__ = ['robot_base', 'get_error'] -import sys -import time, urllib from base64 import b64encode +import sys +import socket +import time +import urllib from urlparse import urljoin -from m_lib.net.www.util import parse_time from m_lib.md5wrapper import md5wrapper +from m_lib.net.www.util import parse_time from bkmk_objects import Robot from parse_html import parse_html -class RedirectException(Exception): - reloc_dict = { - 301: "perm.", - 302: "temp2.", - 303: "temp3.", - 307: "temp7.", - "html": "html" - } - def __init__(self, errcode, newurl): - Exception.__init__(self, "(%s) to %s" % (self.reloc_dict[errcode], newurl)) - self.url = newurl +reloc_dict = { + 301: "perm.", + 302: "temp2.", + 303: "temp3.", + 307: "temp7.", + "html": "html" +} -def get_error(msg): - if isinstance(msg, str): - return msg +def get_error(e): + if isinstance(e, str): + return e else: s = [] - for i in msg: + for i in e: s.append("'%s'" % str(i).replace('\n', "\\n")) return "(%s)" % ' '.join(s) @@ -51,6 +49,12 @@ icons = {} # Icon cache; maps URL to a tuple (content type, data) # or None if there is no icon. class robot_base(Robot): + timeout = 60 + + def __init__(self, *args, **kw): + Robot.__init__(self, *args, **kw) + socket.setdefaulttimeout(int(self.timeout)) + def check_url(self, bookmark): try: self.start = int(time.time()) @@ -61,12 +65,14 @@ class robot_base(Robot): url_path, url_tag = urllib.splittag(url_path) url = "%s://%s%s" % (url_type, url_host, url_path) - headers, content, error = self.get(bookmark, url, True) + error, redirect_code, redirect_to, headers, content = self.get(bookmark, url, True) if error: bookmark.error = error + return 1 - if content is None: + if redirect_code: + self.set_redirect(bookmark, redirect_code, redirect_to) return 1 size = 0 @@ -97,7 +103,7 @@ class robot_base(Robot): bookmark.last_modified = last_modified md5 = md5wrapper() - if urllib._urlopener.type == "ftp": # Pass welcome message through MD5 + if url_type == "ftp": # Pass welcome message through MD5 md5.update(self.get_ftp_welcome()) md5.update(content) @@ -144,10 +150,11 @@ class robot_base(Robot): try: _icon_url = icon_url for i in range(8): - try: - icon_headers, icon_data, error = self.get(bookmark, _icon_url) - except RedirectException, e: - _icon_url = e.url + error, icon_redirect_code, icon_redirect_to, \ + icon_headers, icon_data = \ + self.get(bookmark, _icon_url) + if icon_redirect_code: + _icon_url = icon_redirect_to self.log(" redirect to : %s" % _icon_url) else: if icon_data is None: @@ -185,13 +192,13 @@ class robot_base(Robot): try: timeout = float(refresh.split(';')[0]) except (IndexError, ValueError): - raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh)) + self.set_redirect(bookmark, "html", "Bad redirect to %s (%s)" % (url, refresh)) else: try: timeout = int(refresh.split(';')[0]) except ValueError: pass # float timeout - raise RedirectException("html", "%s (%s sec)" % (url, timeout)) + self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout)) except KeyError, key: self.log(" no header: %s" % key) @@ -200,14 +207,14 @@ class robot_base(Robot): bookmark.error = "Unexpected EOF (FTP server closed connection)" self.log(' EOF: %s' % bookmark.error) - except RedirectException, msg: - bookmark.moved = str(msg) - self.log(' Moved: %s' % bookmark.moved) - except KeyboardInterrupt: self.log("Keyboard interrupt (^C)") return 0 + except socket.error, e: + bookmark.error = get_error(e) + self.log(bookmark.error) + except: import traceback traceback.print_exc() @@ -220,11 +227,12 @@ class robot_base(Robot): # Tested return 1 + def set_redirect(self, bookmark, errcode, newurl): + bookmark.moved = "(%s) to %s" % (reloc_dict[errcode], newurl) + self.log(' Moved: %s' % bookmark.moved) + def finish_check_url(self, bookmark): start = self.start bookmark.last_tested = str(start) - now = int(time.time()) bookmark.test_time = str(now - start) - - self.cleanup()