X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_rurllib2.py;h=d1b679c11d68ebef2f14e815d3c2230cfd3c46d6;hb=2ec71bc7b14b39fc84806fd614631dc6f2b98b36;hp=b15a7b0dfea49c9d19b014ebb65052dd5027a552;hpb=27c6253f3e707d0b90e67ee52f78e1335482e17e;p=bookmarks_db.git diff --git a/Robots/bkmk_rurllib2.py b/Robots/bkmk_rurllib2.py index b15a7b0..d1b679c 100644 --- a/Robots/bkmk_rurllib2.py +++ b/Robots/bkmk_rurllib2.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 2014-2024 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['robot_urllib2'] @@ -14,12 +14,23 @@ __all__ = ['robot_urllib2'] import sys import httplib import urllib2 -from Robots.bkmk_robot_base import robot_base +from Robots.bkmk_robot_base import robot_base, request_headers, get_error + + +_fw = None + + +class FTPHandler(urllib2.FTPHandler): + def connect_ftp(self, user, passwd, host, port, dirs, timeout): + global _fw + _fw = urllib2.FTPHandler.connect_ftp(self, user, passwd, host, port, + dirs, timeout) + return _fw opener = urllib2.OpenerDirector() default_classes = [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler, - urllib2.FTPHandler, urllib2.HTTPErrorProcessor] + FTPHandler, urllib2.HTTPErrorProcessor] if hasattr(httplib, 'HTTPS'): default_classes.insert(0, urllib2.HTTPSHandler) for klass in default_classes: @@ -28,36 +39,46 @@ for klass in default_classes: urllib2.install_opener(opener) -# Fake headers to pretend this is a real browser -_user_agent = "Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0" -_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib2)" % ( - sys.version_info[0], sys.version_info[1], sys.version_info[2]) - - class robot_urllib2(robot_base): - def get(self, bookmark, url, accept_charset=False): + def get(self, bookmark, url, accept_charset=False): request = urllib2.Request(url) - request.add_header('Accept', '*/*') + for h, v in request_headers.items(): + request.add_header(h, v) if accept_charset and bookmark.charset: request.add_header('Accept-Charset', bookmark.charset) - request.add_header('Accept-Language', 'ru,en') - request.add_header('Cache-Control', 'max-age=300') - request.add_header('Connection', 'close') - request.add_header('Referer', url) - request.add_header('User-agent', _user_agent) - request.add_header('X-User-Agent', _x_user_agent) + + global _fw + _fw = None try: response = urllib2.urlopen(request) - except urllib2.HTTPError, e: - if e.code in (301, 302, 303, 307): + + except urllib2.HTTPError as e: + if e.code in (301, 302, 303, 307, 308): return None, e.code, e.hdrs['Location'], None, None else: - return "HTTP Error %s: %s" % (e.code, e.msg), None, None, None, None - except urllib2.URLError, e: + self.log(' HTTP Error %s: %s' % (e.code, e.msg)) + return ("HTTP Error %s: %s" % (e.code, e.msg), + None, None, None, None) + + except urllib2.URLError as e: + self.log(' URL Error: %s' % e.reason) return "URL Error: %s" % e.reason, None, None, None, None + + except httplib.HTTPException as e: + error = get_error(e) + self.log(' HTTP Exception: %s' % error) + return "HTTP Exception: %s" % error, None, None, None, None + + except IOError as e: + error = get_error(e) + self.log(' I/O Error: %s' % error) + return "I/O Error: %s" % error, None, None, None, None + else: return None, None, None, response.info(), response.read() - def get_ftp_welcome(self): - return '' + def get_ftp_welcome(self): + if _fw is None: + return '' + return _fw.ftp.welcome