import sys, os
import time, urllib
-from Robots.bkmk_robot_base import robot_base, RedirectException, get_error
+from Robots.bkmk_robot_base import robot_base, get_error
+class RedirectException(Exception):
+ def __init__(self, errcode, newurl):
+ Exception.__init__(self)
+ self.errcode = errcode
+ self.newurl = newurl
+
class MyURLopener(urllib.URLopener):
- # Error 302 -- relocated (temporarily)
- def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
+ # Error 301 -- relocated (permanently)
+ def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
if headers.has_key('location'):
newurl = headers['location']
elif headers.has_key('uri'):
newurl = "Nowhere"
raise RedirectException(errcode, newurl)
- # Error 301 -- also relocated (permanently)
- http_error_301 = http_error_302
- # Error 307 -- also relocated (temporary)
- http_error_307 = http_error_302
+ # Error 302 -- relocated (temporarily)
+ http_error_302 = http_error_301
+ # Error 303 -- relocated (see other)
+ http_error_303 = http_error_301
+ # Error 307 -- relocated (temporarily)
+ http_error_307 = http_error_301
# Error 401 -- authentication required
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
class robot_urllib(robot_base):
- def urlretrieve(self, bookmark, url, accept_charset=False):
+ def get(self, bookmark, url, accept_charset=False):
try:
# Set fake referer to the base URL
urllib._urlopener.addheaders[2] = ('Referer', url)
if accept_charset and bookmark.charset:
urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
- fname, headers = urllib.urlretrieve(url)
- if accept_charset and bookmark.charset:
- del urllib._urlopener.addheaders[-1]
+ try:
+ fname, headers = urllib.urlretrieve(url)
+ finally:
+ if accept_charset and bookmark.charset:
+ del urllib._urlopener.addheaders[-1] # Remove Accept-Charset
infile = open(fname, 'rb')
content = infile.read()
infile.close()
- return headers, content, None
+ return None, None, None, headers, content
+
+ except RedirectException, e:
+ return None, e.errcode, e.newurl, None, None
except IOError, msg:
if (msg[0] == "http error") and (msg[1] == -1):
error = get_error(msg)
self.log(' Error: %s' % error)
- return None, None, error
+ return error, None, None, None, None
def get_ftp_welcome(self):
global ftpcache_key
# If there are - ftpcache_key in prev line is invalid.
return _welcome
- def cleanup(self):
+ def finish_check_url(self, bookmark):
+ robot_base.finish_check_url(self, bookmark)
urllib.urlcleanup()