X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_rurllib.py;h=6f15ad5cf193aecd5832dd5ed7837ca2f936e232;hb=97202bf84e9c78d8fac912a82a85fb1cbb233a4b;hp=839e37316197d10a51b8812e5bca874e7f00738f;hpb=917353d7c874075d3709f9295dc231d8894e61a4;p=bookmarks_db.git diff --git a/Robots/bkmk_rurllib.py b/Robots/bkmk_rurllib.py index 839e373..6f15ad5 100644 --- a/Robots/bkmk_rurllib.py +++ b/Robots/bkmk_rurllib.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['robot_urllib'] @@ -13,9 +13,15 @@ __all__ = ['robot_urllib'] import sys, os import time, urllib -from Robots.bkmk_robot_base import robot_base, RedirectException, get_error +from Robots.bkmk_robot_base import robot_base, get_error +class RedirectException(Exception): + def __init__(self, errcode, newurl): + Exception.__init__(self) + self.errcode = errcode + self.newurl = newurl + class MyURLopener(urllib.URLopener): # Error 301 -- relocated (permanently) def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): @@ -36,29 +42,29 @@ class MyURLopener(urllib.URLopener): # Error 401 -- authentication required def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): - raise IOError, ('http error', errcode, "Authentication required ", headers) + raise IOError(('http error', errcode, "Authentication required ", headers)) def http_error_default(self, url, fp, errcode, errmsg, headers): if fp: void = fp.read() fp.close() - raise IOError, ('http error', errcode, errmsg, headers) + raise IOError(('http error', errcode, errmsg, headers)) urllib._urlopener = MyURLopener() # Fake headers to pretend this is a real browser -_version = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en) Gecko/20001221 Firefox/2.0.0" -urllib._urlopener.addheaders[0] = ('User-Agent', _version) -_version = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % ( +_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en) Gecko/20001221 Firefox/2.0.0" +urllib._urlopener.addheaders[0] = ('User-Agent', _user_agent) +_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % ( sys.version_info[0], sys.version_info[1], sys.version_info[2], urllib.__version__) -urllib._urlopener.addheader('X-User-Agent', _version) +urllib._urlopener.addheader('X-User-Agent', _x_user_agent) urllib._urlopener.addheader('Referer', '') -urllib._urlopener.addheader('Connection', 'close') urllib._urlopener.addheader('Accept', '*/*') urllib._urlopener.addheader('Accept-Language', 'ru,en') urllib._urlopener.addheader('Cache-Control', 'max-age=300') +urllib._urlopener.addheader('Connection', 'close') urllib_ftpwrapper = urllib.ftpwrapper @@ -91,18 +97,21 @@ class robot_urllib(robot_base): content = infile.read() infile.close() - return headers, content, None + return None, None, None, headers, content + + except RedirectException as e: + return None, e.errcode, e.newurl, None, None - except IOError, msg: - if (msg[0] == "http error") and (msg[1] == -1): + except IOError as e: + if (e[0] == "http error") and (e[1] == -1): error = None bookmark.no_error = "The server did not return any header - it is not an error, actually" self.log(' no headers: %s' % bookmark.no_error) else: - error = get_error(msg) + error = get_error(e) self.log(' Error: %s' % error) - return None, None, error + return error, None, None, None, None def get_ftp_welcome(self): global ftpcache_key @@ -111,5 +120,6 @@ class robot_urllib(robot_base): # If there are - ftpcache_key in prev line is invalid. return _welcome - def cleanup(self): + def finish_check_url(self, bookmark): + robot_base.finish_check_url(self, bookmark) urllib.urlcleanup()