X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_rurllib.py;h=067799c737271b5ceeb373eeb321bc12eaf1e7ec;hb=da4ae53194bee4fde776d2bca07db275d6102a62;hp=c7324e315bcd7ad5a5c1c1b785bcf2c139a29f91;hpb=c88cb7a75e7caf1d67466cfa107981d95115fa0c;p=bookmarks_db.git diff --git a/Robots/bkmk_rurllib.py b/Robots/bkmk_rurllib.py index c7324e3..067799c 100644 --- a/Robots/bkmk_rurllib.py +++ b/Robots/bkmk_rurllib.py @@ -5,14 +5,15 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['robot_urllib'] -import sys, os -import time, urllib +import sys +import urllib + from Robots.bkmk_robot_base import robot_base, get_error @@ -22,12 +23,13 @@ class RedirectException(Exception): self.errcode = errcode self.newurl = newurl + class MyURLopener(urllib.URLopener): # Error 301 -- relocated (permanently) def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - if headers.has_key('location'): + if 'location' in headers: newurl = headers['location'] - elif headers.has_key('uri'): + elif 'uri' in headers: newurl = headers['uri'] else: newurl = "Nowhere" @@ -39,14 +41,17 @@ class MyURLopener(urllib.URLopener): http_error_303 = http_error_301 # Error 307 -- relocated (temporarily) http_error_307 = http_error_301 + # Error 308 -- relocated (permanently) + http_error_308 = http_error_301 # Error 401 -- authentication required def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): - raise IOError(('http error', errcode, "Authentication required ", headers)) + raise IOError( + ('http error', errcode, "Authentication required ", headers)) def http_error_default(self, url, fp, errcode, errmsg, headers): if fp: - void = fp.read() + fp.read() fp.close() raise IOError(('http error', errcode, errmsg, headers)) @@ -54,10 +59,13 @@ class MyURLopener(urllib.URLopener): urllib._urlopener = MyURLopener() # Fake headers to pretend this is a real browser -_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en) Gecko/20001221 Firefox/2.0.0" +_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)" +" Gecko/20001221 Firefox/2.0.0" urllib._urlopener.addheaders[0] = ('User-Agent', _user_agent) _x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % ( - sys.version_info[0], sys.version_info[1], sys.version_info[2], urllib.__version__) + sys.version_info[0], sys.version_info[1], + sys.version_info[2], urllib.__version__ +) urllib._urlopener.addheader('X-User-Agent', _x_user_agent) urllib._urlopener.addheader('Referer', '') @@ -70,12 +78,14 @@ urllib._urlopener.addheader('Connection', 'close') urllib_ftpwrapper = urllib.ftpwrapper ftpcache_key = None + class myftpwrapper(urllib_ftpwrapper): def __init__(self, user, passwd, host, port, dirs): urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs) global ftpcache_key ftpcache_key = (user, host, port, '/'.join(dirs)) + urllib.ftpwrapper = myftpwrapper @@ -91,9 +101,10 @@ class robot_urllib(robot_base): fname, headers = urllib.urlretrieve(url) finally: if accept_charset and bookmark.charset: - del urllib._urlopener.addheaders[-1] # Remove Accept-Charset + # Remove Accept-Charset + del urllib._urlopener.addheaders[-1] - infile = open(fname, 'rb') + infile = open(fname, 'rt') content = infile.read() infile.close() @@ -105,7 +116,8 @@ class robot_urllib(robot_base): except IOError as e: if (e[0] == "http error") and (e[1] == -1): error = None - bookmark.no_error = "The server did not return any header - it is not an error, actually" + bookmark.no_error = "The server did not return any header - " + "it is not an error, actually" self.log(' no headers: %s' % bookmark.no_error) else: error = get_error(e) @@ -116,8 +128,9 @@ class robot_urllib(robot_base): def get_ftp_welcome(self): global ftpcache_key _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome - ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db. - # If there are - ftpcache_key in prev line is invalid. + # I am assuming there are no duplicate ftp URLs in db. + # If there are - ftpcache_key in next line is invalid. + ftpcache_key = None return _welcome def finish_check_url(self, bookmark):