1 """Simple, strightforward robot based on urllib
3 This file is a part of Bookmarks database and Internet robot.
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design"
9 __license__ = "GNU GPL"
11 __all__ = ['robot_urllib']
16 from Robots.bkmk_robot_base import robot_base, RedirectException, get_error
19 class MyURLopener(urllib.URLopener):
20 # Error 302 -- relocated (temporarily)
21 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
22 if headers.has_key('location'):
23 newurl = headers['location']
24 elif headers.has_key('uri'):
25 newurl = headers['uri']
28 raise RedirectException(errcode, newurl)
30 # Error 301 -- also relocated (permanently)
31 http_error_301 = http_error_302
32 # Error 307 -- also relocated (temporary)
33 http_error_307 = http_error_302
35 # Error 401 -- authentication required
36 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
37 raise IOError, ('http error', errcode, "Authentication required ", headers)
39 def http_error_default(self, url, fp, errcode, errmsg, headers):
43 raise IOError, ('http error', errcode, errmsg, headers)
46 urllib._urlopener = MyURLopener()
48 # Fake headers to pretend this is a real browser
49 _version = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en) Gecko/20001221 Firefox/2.0.0"
50 urllib._urlopener.addheaders[0] = ('User-Agent', _version)
51 _version = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % (
52 sys.version_info[0], sys.version_info[1], sys.version_info[2], urllib.__version__)
53 urllib._urlopener.addheader('X-User-Agent', _version)
54 urllib._urlopener.addheader('Referer', '')
56 urllib._urlopener.addheader('Connection', 'close')
57 urllib._urlopener.addheader('Accept', '*/*')
58 urllib._urlopener.addheader('Accept-Language', 'ru,en')
59 urllib._urlopener.addheader('Cache-Control', 'max-age=300')
62 urllib_ftpwrapper = urllib.ftpwrapper
65 class myftpwrapper(urllib_ftpwrapper):
66 def __init__(self, user, passwd, host, port, dirs):
67 urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
69 ftpcache_key = (user, host, port, '/'.join(dirs))
71 urllib.ftpwrapper = myftpwrapper
74 class robot_urllib(robot_base):
75 def urlretrieve(self, bookmark, url, accept_charset=False):
77 # Set fake referer to the base URL
78 urllib._urlopener.addheaders[2] = ('Referer', url)
80 if accept_charset and bookmark.charset:
81 urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
82 fname, headers = urllib.urlretrieve(url)
83 if accept_charset and bookmark.charset:
84 del urllib._urlopener.addheaders[-1]
86 infile = open(fname, 'rb')
87 content = infile.read()
90 return headers, content
93 if (msg[0] == "http error") and (msg[1] == -1):
94 bookmark.no_error = "The server did not return any header - it is not an error, actually"
95 self.log(' no headers: %s' % bookmark.no_error)
97 bookmark.error = get_error(msg)
98 self.log(' Error: %s' % bookmark.error)
102 def get_ftp_welcome(self):
104 _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
105 ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
106 # If there are - ftpcache_key in prev line is invalid.