X-Git-Url: https://git.phdru.name/?p=bookmarks_db.git;a=blobdiff_plain;f=Robots%2Fbkmk_rurllib.py;h=c7324e315bcd7ad5a5c1c1b785bcf2c139a29f91;hp=6f15ad5cf193aecd5832dd5ed7837ca2f936e232;hb=c88cb7a75e7caf1d67466cfa107981d95115fa0c;hpb=a04eaa0346e8aa5ad86a195f8f4d36487ebfe09c diff --git a/Robots/bkmk_rurllib.py b/Robots/bkmk_rurllib.py index 6f15ad5..c7324e3 100644 --- a/Robots/bkmk_rurllib.py +++ b/Robots/bkmk_rurllib.py @@ -17,38 +17,38 @@ from Robots.bkmk_robot_base import robot_base, get_error class RedirectException(Exception): - def __init__(self, errcode, newurl): - Exception.__init__(self) - self.errcode = errcode - self.newurl = newurl + def __init__(self, errcode, newurl): + Exception.__init__(self) + self.errcode = errcode + self.newurl = newurl class MyURLopener(urllib.URLopener): - # Error 301 -- relocated (permanently) - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - if headers.has_key('location'): - newurl = headers['location'] - elif headers.has_key('uri'): - newurl = headers['uri'] - else: - newurl = "Nowhere" - raise RedirectException(errcode, newurl) - - # Error 302 -- relocated (temporarily) - http_error_302 = http_error_301 - # Error 303 -- relocated (see other) - http_error_303 = http_error_301 - # Error 307 -- relocated (temporarily) - http_error_307 = http_error_301 - - # Error 401 -- authentication required - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): - raise IOError(('http error', errcode, "Authentication required ", headers)) - - def http_error_default(self, url, fp, errcode, errmsg, headers): - if fp: - void = fp.read() - fp.close() - raise IOError(('http error', errcode, errmsg, headers)) + # Error 301 -- relocated (permanently) + def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): + if headers.has_key('location'): + newurl = headers['location'] + elif headers.has_key('uri'): + newurl = headers['uri'] + else: + newurl = "Nowhere" + raise RedirectException(errcode, newurl) + + # Error 302 -- relocated (temporarily) + http_error_302 = http_error_301 + # Error 303 -- relocated (see other) + http_error_303 = http_error_301 + # Error 307 -- relocated (temporarily) + http_error_307 = http_error_301 + + # Error 401 -- authentication required + def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): + raise IOError(('http error', errcode, "Authentication required ", headers)) + + def http_error_default(self, url, fp, errcode, errmsg, headers): + if fp: + void = fp.read() + fp.close() + raise IOError(('http error', errcode, errmsg, headers)) urllib._urlopener = MyURLopener() @@ -71,55 +71,55 @@ urllib_ftpwrapper = urllib.ftpwrapper ftpcache_key = None class myftpwrapper(urllib_ftpwrapper): - def __init__(self, user, passwd, host, port, dirs): - urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs) - global ftpcache_key - ftpcache_key = (user, host, port, '/'.join(dirs)) + def __init__(self, user, passwd, host, port, dirs): + urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs) + global ftpcache_key + ftpcache_key = (user, host, port, '/'.join(dirs)) urllib.ftpwrapper = myftpwrapper class robot_urllib(robot_base): - def get(self, bookmark, url, accept_charset=False): - try: - # Set fake referer to the base URL - urllib._urlopener.addheaders[2] = ('Referer', url) - - if accept_charset and bookmark.charset: - urllib._urlopener.addheader('Accept-Charset', bookmark.charset) - try: - fname, headers = urllib.urlretrieve(url) - finally: + def get(self, bookmark, url, accept_charset=False): + try: + # Set fake referer to the base URL + urllib._urlopener.addheaders[2] = ('Referer', url) + if accept_charset and bookmark.charset: - del urllib._urlopener.addheaders[-1] # Remove Accept-Charset - - infile = open(fname, 'rb') - content = infile.read() - infile.close() - - return None, None, None, headers, content - - except RedirectException as e: - return None, e.errcode, e.newurl, None, None - - except IOError as e: - if (e[0] == "http error") and (e[1] == -1): - error = None - bookmark.no_error = "The server did not return any header - it is not an error, actually" - self.log(' no headers: %s' % bookmark.no_error) - else: - error = get_error(e) - self.log(' Error: %s' % error) - - return error, None, None, None, None - - def get_ftp_welcome(self): - global ftpcache_key - _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome - ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db. - # If there are - ftpcache_key in prev line is invalid. - return _welcome - - def finish_check_url(self, bookmark): - robot_base.finish_check_url(self, bookmark) - urllib.urlcleanup() + urllib._urlopener.addheader('Accept-Charset', bookmark.charset) + try: + fname, headers = urllib.urlretrieve(url) + finally: + if accept_charset and bookmark.charset: + del urllib._urlopener.addheaders[-1] # Remove Accept-Charset + + infile = open(fname, 'rb') + content = infile.read() + infile.close() + + return None, None, None, headers, content + + except RedirectException as e: + return None, e.errcode, e.newurl, None, None + + except IOError as e: + if (e[0] == "http error") and (e[1] == -1): + error = None + bookmark.no_error = "The server did not return any header - it is not an error, actually" + self.log(' no headers: %s' % bookmark.no_error) + else: + error = get_error(e) + self.log(' Error: %s' % error) + + return error, None, None, None, None + + def get_ftp_welcome(self): + global ftpcache_key + _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome + ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db. + # If there are - ftpcache_key in prev line is invalid. + return _welcome + + def finish_check_url(self, bookmark): + robot_base.finish_check_url(self, bookmark) + urllib.urlcleanup()