--- /dev/null
+"""Simple, strightforward robot based on urllib
+This file is a part of Bookmarks database and Internet robot.
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__license__ = "GNU GPL"
+__all__ = ['robot_urllib_py3']
+import sys
+import urllib.request
+from Robots.bkmk_robot_base import robot_base, get_error
+class RedirectException(Exception):
+ def __init__(self, errcode, newurl):
+ Exception.__init__(self)
+ self.errcode = errcode
+ self.newurl = newurl
+class MyURLopener(urllib.request.URLopener):
+ # Error 301 -- relocated (permanently)
+ def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
+ if 'location' in headers:
+ newurl = headers['location']
+ elif 'uri' in headers:
+ newurl = headers['uri']
+ else:
+ newurl = "Nowhere"
+ raise RedirectException(errcode, newurl)
+ # Error 302 -- relocated (temporarily)
+ http_error_302 = http_error_301
+ # Error 303 -- relocated (see other)
+ http_error_303 = http_error_301
+ # Error 307 -- relocated (temporarily)
+ http_error_307 = http_error_301
+ # Error 401 -- authentication required
+ def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
+ raise IOError(
+ ('http error', errcode, "Authentication required ", headers))
+ def http_error_default(self, url, fp, errcode, errmsg, headers):
+ if fp:
+ fp.read()
+ fp.close()
+ raise IOError(('http error', errcode, errmsg, headers))
+urllib.request._opener = MyURLopener()
+# Fake headers to pretend this is a real browser
+_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
+" Gecko/20001221 Firefox/2.0.0"
+urllib.request._opener.addheaders[0] = ('User-Agent', _user_agent)
+_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % (
+ sys.version_info[0], sys.version_info[1],
+ sys.version_info[2], urllib.request.__version__
+urllib.request._opener.addheader('X-User-Agent', _x_user_agent)
+urllib.request._opener.addheader('Referer', '')
+urllib.request._opener.addheader('Accept', '*/*')
+urllib.request._opener.addheader('Accept-Language', 'ru,en')
+urllib.request._opener.addheader('Cache-Control', 'max-age=300')
+urllib.request._opener.addheader('Connection', 'close')
+urllib_ftpwrapper = urllib.request.ftpwrapper
+ftpcache_key = None
+class myftpwrapper(urllib_ftpwrapper):
+ def __init__(self, user, passwd, host, port, dirs):
+ urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
+ global ftpcache_key
+ ftpcache_key = (user, host, port, '/'.join(dirs))
+urllib.request.ftpwrapper = myftpwrapper
+class robot_urllib_py3(robot_base):
+ def get(self, bookmark, url, accept_charset=False):
+ try:
+ # Set fake referer to the base URL
+ urllib.request._opener.addheaders[2] = ('Referer', url)
+ if accept_charset and bookmark.charset:
+ urllib.request._opener.addheader('Accept-Charset', bookmark.charset)
+ try:
+ fname, headers = urllib.request.urlretrieve(url)
+ finally:
+ if accept_charset and bookmark.charset:
+ # Remove Accept-Charset
+ del urllib.request._opener.addheaders[-1]
+ infile = open(fname, 'rt')
+ content = infile.read()
+ infile.close()
+ return None, None, None, headers, content
+ except RedirectException as e:
+ return None, e.errcode, e.newurl, None, None
+ except IOError as e:
+ if (e[0] == "http error") and (e[1] == -1):
+ error = None
+ bookmark.no_error = "The server did not return any header - "
+ "it is not an error, actually"
+ self.log(' no headers: %s' % bookmark.no_error)
+ else:
+ error = get_error(e)
+ self.log(' Error: %s' % error)
+ return error, None, None, None, None
+ def get_ftp_welcome(self):
+ global ftpcache_key
+ _welcome = urllib.request._opener.ftpcache[ftpcache_key].ftp.welcome
+ # I am assuming there are no duplicate ftp URLs in db.
+ # If there are - ftpcache_key in next line is invalid.
+ ftpcache_key = None
+ return _welcome
+ def finish_check_url(self, bookmark):
+ robot_base.finish_check_url(self, bookmark)
+ urllib.request.urlcleanup()