1 """Robot based on urllib2
3 This file is a part of Bookmarks database and Internet robot.
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2014 PhiloSoft Design"
9 __license__ = "GNU GPL"
11 __all__ = ['robot_urllib2']
17 from Robots.bkmk_robot_base import robot_base, get_error
22 class FTPHandler(urllib2.FTPHandler):
23 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
25 _fw = urllib2.FTPHandler.connect_ftp(self, user, passwd, host, port,
30 opener = urllib2.OpenerDirector()
31 default_classes = [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
32 FTPHandler, urllib2.HTTPErrorProcessor]
33 if hasattr(httplib, 'HTTPS'):
34 default_classes.insert(0, urllib2.HTTPSHandler)
35 for klass in default_classes:
36 opener.add_handler(klass())
38 urllib2.install_opener(opener)
41 # Fake headers to pretend this is a real browser
42 _user_agent = "Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0"
43 _x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib2)" % (
44 sys.version_info[0], sys.version_info[1], sys.version_info[2])
47 class robot_urllib2(robot_base):
48 def get(self, bookmark, url, accept_charset=False):
49 request = urllib2.Request(url)
50 request.add_header('Accept', '*/*')
51 if accept_charset and bookmark.charset:
52 request.add_header('Accept-Charset', bookmark.charset)
53 request.add_header('Accept-Language', 'ru,en')
54 request.add_header('Cache-Control', 'max-age=300')
55 request.add_header('Connection', 'close')
56 request.add_header('Referer', url)
57 request.add_header('User-agent', _user_agent)
58 request.add_header('X-User-Agent', _x_user_agent)
64 response = urllib2.urlopen(request)
66 except urllib2.HTTPError, e:
67 if e.code in (301, 302, 303, 307):
68 return None, e.code, e.hdrs['Location'], None, None
70 self.log(' HTTP Error %s: %s' % (e.code, e.msg))
71 return "HTTP Error %s: %s" % (e.code, e.msg), None, None, None, None
73 except urllib2.URLError, e:
74 self.log(' URL Error: %s' % e.reason)
75 return "URL Error: %s" % e.reason, None, None, None, None
77 except httplib.HTTPException, e:
79 self.log(' HTTP Exception: %s' % error)
80 return "HTTP Exception: %s" % error, None, None, None, None
84 self.log(' I/O Error: %s' % error)
85 return "I/O Error: %s" % error, None, None, None, None
88 return None, None, None, response.info(), response.read()
90 def get_ftp_welcome(self):
93 return _fw.ftp.welcome