From c19fbf714f025012d089aa86e4fa8e3b46f3a01e Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sat, 5 Jul 2014 01:04:27 +0400 Subject: [PATCH] Return redirect code/destination URL Return redirect code and destination URL instead of raising RedirectException. --- Robots/bkmk_robot_base.py | 45 ++++++++++++++++++++------------------- Robots/bkmk_rurllib.py | 15 ++++++++++--- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index dce5933..80079b4 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -23,17 +23,13 @@ from bkmk_objects import Robot from parse_html import parse_html -class RedirectException(Exception): - reloc_dict = { - 301: "perm.", - 302: "temp2.", - 303: "temp3.", - 307: "temp7.", - "html": "html" - } - def __init__(self, errcode, newurl): - Exception.__init__(self, "(%s) to %s" % (self.reloc_dict[errcode], newurl)) - self.url = newurl +reloc_dict = { + 301: "perm.", + 302: "temp2.", + 303: "temp3.", + 307: "temp7.", + "html": "html" +} def get_error(msg): @@ -61,12 +57,16 @@ class robot_base(Robot): url_path, url_tag = urllib.splittag(url_path) url = "%s://%s%s" % (url_type, url_host, url_path) - error, headers, content = self.get(bookmark, url, True) + error, redirect_code, redirect_to, headers, content = self.get(bookmark, url, True) if error: bookmark.error = error return 1 + if redirect_code: + self.set_redirect(bookmark, redirect_code, redirect_to) + return 1 + size = 0 last_modified = None @@ -142,10 +142,11 @@ class robot_base(Robot): try: _icon_url = icon_url for i in range(8): - try: - error, icon_headers, icon_data = self.get(bookmark, _icon_url) - except RedirectException, e: - _icon_url = e.url + error, icon_redirect_code, icon_redirect_to, \ + icon_headers, icon_data = \ + self.get(bookmark, _icon_url) + if icon_redirect_code: + _icon_url = icon_redirect_to self.log(" redirect to : %s" % _icon_url) else: if icon_data is None: @@ -183,13 +184,13 @@ class robot_base(Robot): try: timeout = float(refresh.split(';')[0]) except (IndexError, ValueError): - raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh)) + self.set_redirect(bookmark, "html", "Bad redirect to %s (%s)" % (url, refresh)) else: try: timeout = int(refresh.split(';')[0]) except ValueError: pass # float timeout - raise RedirectException("html", "%s (%s sec)" % (url, timeout)) + self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout)) except KeyError, key: self.log(" no header: %s" % key) @@ -198,10 +199,6 @@ class robot_base(Robot): bookmark.error = "Unexpected EOF (FTP server closed connection)" self.log(' EOF: %s' % bookmark.error) - except RedirectException, msg: - bookmark.moved = str(msg) - self.log(' Moved: %s' % bookmark.moved) - except KeyboardInterrupt: self.log("Keyboard interrupt (^C)") return 0 @@ -218,6 +215,10 @@ class robot_base(Robot): # Tested return 1 + def set_redirect(self, bookmark, errcode, newurl): + bookmark.moved = "(%s) to %s" % (reloc_dict[errcode], newurl) + self.log(' Moved: %s' % bookmark.moved) + def finish_check_url(self, bookmark): start = self.start bookmark.last_tested = str(start) diff --git a/Robots/bkmk_rurllib.py b/Robots/bkmk_rurllib.py index d9908b8..6c10148 100644 --- a/Robots/bkmk_rurllib.py +++ b/Robots/bkmk_rurllib.py @@ -13,9 +13,15 @@ __all__ = ['robot_urllib'] import sys, os import time, urllib -from Robots.bkmk_robot_base import robot_base, RedirectException, get_error +from Robots.bkmk_robot_base import robot_base, get_error +class RedirectException(Exception): + def __init__(self, errcode, newurl): + Exception.__init__(self) + self.errcode = errcode + self.newurl = newurl + class MyURLopener(urllib.URLopener): # Error 301 -- relocated (permanently) def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): @@ -91,7 +97,10 @@ class robot_urllib(robot_base): content = infile.read() infile.close() - return None, headers, content + return None, None, None, headers, content + + except RedirectException, e: + return None, e.errcode, e.newurl, None, None except IOError, msg: if (msg[0] == "http error") and (msg[1] == -1): @@ -102,7 +111,7 @@ class robot_urllib(robot_base): error = get_error(msg) self.log(' Error: %s' % error) - return error, None, None + return error, None, None, None, None def get_ftp_welcome(self): global ftpcache_key -- 2.39.2