"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['robot_urllib']
class RedirectException(Exception):
- def __init__(self, errcode, newurl):
- Exception.__init__(self)
- self.errcode = errcode
- self.newurl = newurl
+ def __init__(self, errcode, newurl):
+ Exception.__init__(self)
+ self.errcode = errcode
+ self.newurl = newurl
class MyURLopener(urllib.URLopener):
- # Error 301 -- relocated (permanently)
- def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
- if headers.has_key('location'):
- newurl = headers['location']
- elif headers.has_key('uri'):
- newurl = headers['uri']
- else:
- newurl = "Nowhere"
- raise RedirectException(errcode, newurl)
-
- # Error 302 -- relocated (temporarily)
- http_error_302 = http_error_301
- # Error 303 -- relocated (see other)
- http_error_303 = http_error_301
- # Error 307 -- relocated (temporarily)
- http_error_307 = http_error_301
-
- # Error 401 -- authentication required
- def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
- raise IOError, ('http error', errcode, "Authentication required ", headers)
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- if fp:
- void = fp.read()
- fp.close()
- raise IOError, ('http error', errcode, errmsg, headers)
+ # Error 301 -- relocated (permanently)
+ def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
+ if headers.has_key('location'):
+ newurl = headers['location']
+ elif headers.has_key('uri'):
+ newurl = headers['uri']
+ else:
+ newurl = "Nowhere"
+ raise RedirectException(errcode, newurl)
+
+ # Error 302 -- relocated (temporarily)
+ http_error_302 = http_error_301
+ # Error 303 -- relocated (see other)
+ http_error_303 = http_error_301
+ # Error 307 -- relocated (temporarily)
+ http_error_307 = http_error_301
+
+ # Error 401 -- authentication required
+ def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
+ raise IOError(('http error', errcode, "Authentication required ", headers))
+
+ def http_error_default(self, url, fp, errcode, errmsg, headers):
+ if fp:
+ void = fp.read()
+ fp.close()
+ raise IOError(('http error', errcode, errmsg, headers))
urllib._urlopener = MyURLopener()
ftpcache_key = None
class myftpwrapper(urllib_ftpwrapper):
- def __init__(self, user, passwd, host, port, dirs):
- urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
- global ftpcache_key
- ftpcache_key = (user, host, port, '/'.join(dirs))
+ def __init__(self, user, passwd, host, port, dirs):
+ urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
+ global ftpcache_key
+ ftpcache_key = (user, host, port, '/'.join(dirs))
urllib.ftpwrapper = myftpwrapper
class robot_urllib(robot_base):
- def get(self, bookmark, url, accept_charset=False):
- try:
- # Set fake referer to the base URL
- urllib._urlopener.addheaders[2] = ('Referer', url)
-
- if accept_charset and bookmark.charset:
- urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
- try:
- fname, headers = urllib.urlretrieve(url)
- finally:
+ def get(self, bookmark, url, accept_charset=False):
+ try:
+ # Set fake referer to the base URL
+ urllib._urlopener.addheaders[2] = ('Referer', url)
+
if accept_charset and bookmark.charset:
- del urllib._urlopener.addheaders[-1] # Remove Accept-Charset
-
- infile = open(fname, 'rb')
- content = infile.read()
- infile.close()
-
- return None, None, None, headers, content
-
- except RedirectException, e:
- return None, e.errcode, e.newurl, None, None
-
- except IOError, e:
- if (e[0] == "http error") and (e[1] == -1):
- error = None
- bookmark.no_error = "The server did not return any header - it is not an error, actually"
- self.log(' no headers: %s' % bookmark.no_error)
- else:
- error = get_error(e)
- self.log(' Error: %s' % error)
-
- return error, None, None, None, None
-
- def get_ftp_welcome(self):
- global ftpcache_key
- _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
- ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
- # If there are - ftpcache_key in prev line is invalid.
- return _welcome
-
- def finish_check_url(self, bookmark):
- robot_base.finish_check_url(self, bookmark)
- urllib.urlcleanup()
+ urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
+ try:
+ fname, headers = urllib.urlretrieve(url)
+ finally:
+ if accept_charset and bookmark.charset:
+ del urllib._urlopener.addheaders[-1] # Remove Accept-Charset
+
+ infile = open(fname, 'rb')
+ content = infile.read()
+ infile.close()
+
+ return None, None, None, headers, content
+
+ except RedirectException as e:
+ return None, e.errcode, e.newurl, None, None
+
+ except IOError as e:
+ if (e[0] == "http error") and (e[1] == -1):
+ error = None
+ bookmark.no_error = "The server did not return any header - it is not an error, actually"
+ self.log(' no headers: %s' % bookmark.no_error)
+ else:
+ error = get_error(e)
+ self.log(' Error: %s' % error)
+
+ return error, None, None, None, None
+
+ def get_ftp_welcome(self):
+ global ftpcache_key
+ _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
+ ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
+ # If there are - ftpcache_key in prev line is invalid.
+ return _welcome
+
+ def finish_check_url(self, bookmark):
+ robot_base.finish_check_url(self, bookmark)
+ urllib.urlcleanup()