X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_rurllib_py3.py;h=2b73347568813505a270d9308e1595172aa98819;hb=48440e20170112c8a036b0c66c1cbe067ed1d87e;hp=b39288770fa2212fe0cbfbaba010505c436faa1d;hpb=68438d614389f7011d78ed6b29d3a2ab13471c8b;p=bookmarks_db.git diff --git a/Robots/bkmk_rurllib_py3.py b/Robots/bkmk_rurllib_py3.py index b392887..2b73347 100644 --- a/Robots/bkmk_rurllib_py3.py +++ b/Robots/bkmk_rurllib_py3.py @@ -11,6 +11,7 @@ __license__ = "GNU GPL" __all__ = ['robot_urllib_py3'] +import socket import sys import urllib.request @@ -53,24 +54,27 @@ class MyURLopener(urllib.request.URLopener): fp.close() raise IOError(('http error', errcode, errmsg, headers)) + def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + return urllib.request.URLopener.open(self, fullurl, data) -urllib.request._opener = MyURLopener() + +urllib.request._opener = opener = MyURLopener() # Fake headers to pretend this is a real browser _user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)" " Gecko/20001221 Firefox/2.0.0" -urllib.request._opener.addheaders[0] = ('User-Agent', _user_agent) +opener.addheaders[0] = ('User-Agent', _user_agent) _x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % ( sys.version_info[0], sys.version_info[1], sys.version_info[2], urllib.request.__version__ ) -urllib.request._opener.addheader('X-User-Agent', _x_user_agent) -urllib.request._opener.addheader('Referer', '') +opener.addheader('X-User-Agent', _x_user_agent) +opener.addheader('Referer', '') -urllib.request._opener.addheader('Accept', '*/*') -urllib.request._opener.addheader('Accept-Language', 'ru,en') -urllib.request._opener.addheader('Cache-Control', 'max-age=300') -urllib.request._opener.addheader('Connection', 'close') +opener.addheader('Accept', '*/*') +opener.addheader('Accept-Language', 'ru,en') +opener.addheader('Cache-Control', 'max-age=300') +opener.addheader('Connection', 'close') urllib_ftpwrapper = urllib.request.ftpwrapper @@ -91,16 +95,16 @@ class robot_urllib_py3(robot_base): def get(self, bookmark, url, accept_charset=False): try: # Set fake referer to the base URL - urllib.request._opener.addheaders[2] = ('Referer', url) + opener.addheaders[2] = ('Referer', url) if accept_charset and bookmark.charset: - urllib.request._opener.addheader('Accept-Charset', bookmark.charset) + opener.addheader('Accept-Charset', bookmark.charset) try: fname, headers = urllib.request.urlretrieve(url) finally: if accept_charset and bookmark.charset: # Remove Accept-Charset - del urllib.request._opener.addheaders[-1] + del opener.addheaders[-1] infile = open(fname, 'rt') content = infile.read() @@ -111,6 +115,11 @@ class robot_urllib_py3(robot_base): except RedirectException as e: return None, e.errcode, e.newurl, None, None + except OSError as e: + error = str(e) + self.log(' Error: %s' % error) + return error, None, None, None, None + except IOError as e: if (e[0] == "http error") and (e[1] == -1): error = None @@ -125,7 +134,7 @@ class robot_urllib_py3(robot_base): def get_ftp_welcome(self): global ftpcache_key - _welcome = urllib.request._opener.ftpcache[ftpcache_key].ftp.welcome + _welcome = opener.ftpcache[ftpcache_key].ftp.welcome # I am assuming there are no duplicate ftp URLs in db. # If there are - ftpcache_key in next line is invalid. ftpcache_key = None @@ -134,3 +143,4 @@ class robot_urllib_py3(robot_base): def finish_check_url(self, bookmark): robot_base.finish_check_url(self, bookmark) urllib.request.urlcleanup() + urllib.request._opener = opener