__all__ = ['robot_urllib_py3']
+import http.client
+import socket
import sys
import urllib.request
http_error_303 = http_error_301
# Error 307 -- relocated (temporarily)
http_error_307 = http_error_301
+ # Error 308 -- relocated (permanently)
+ http_error_308 = http_error_301
# Error 401 -- authentication required
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
fp.close()
raise IOError(('http error', errcode, errmsg, headers))
+ def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+ return urllib.request.URLopener.open(self, fullurl, data)
-urllib.request._opener = MyURLopener()
+
+urllib.request._opener = opener = MyURLopener()
# Fake headers to pretend this is a real browser
_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
" Gecko/20001221 Firefox/2.0.0"
-urllib.request._opener.addheaders[0] = ('User-Agent', _user_agent)
+opener.addheaders[0] = ('User-Agent', _user_agent)
_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % (
sys.version_info[0], sys.version_info[1],
sys.version_info[2], urllib.request.__version__
)
-urllib.request._opener.addheader('X-User-Agent', _x_user_agent)
-urllib.request._opener.addheader('Referer', '')
+opener.addheader('X-User-Agent', _x_user_agent)
+opener.addheader('Referer', '')
-urllib.request._opener.addheader('Accept', '*/*')
-urllib.request._opener.addheader('Accept-Language', 'ru,en')
-urllib.request._opener.addheader('Cache-Control', 'max-age=300')
-urllib.request._opener.addheader('Connection', 'close')
+opener.addheader('Accept', '*/*')
+opener.addheader('Accept-Language', 'ru,en')
+opener.addheader('Cache-Control', 'max-age=300')
+opener.addheader('Connection', 'close')
urllib_ftpwrapper = urllib.request.ftpwrapper
def get(self, bookmark, url, accept_charset=False):
try:
# Set fake referer to the base URL
- urllib.request._opener.addheaders[2] = ('Referer', url)
+ opener.addheaders[2] = ('Referer', url)
if accept_charset and bookmark.charset:
- urllib.request._opener.addheader('Accept-Charset', bookmark.charset)
+ opener.addheader('Accept-Charset', bookmark.charset)
try:
fname, headers = urllib.request.urlretrieve(url)
finally:
if accept_charset and bookmark.charset:
# Remove Accept-Charset
- del urllib.request._opener.addheaders[-1]
-
- infile = open(fname, 'rt')
- content = infile.read()
- infile.close()
-
+ del opener.addheaders[-1]
+
+ possible_encodings = []
+ for encoding in (
+ bookmark.charset,
+ sys.getfilesystemencoding(),
+ 'utf-8',
+ ):
+ if encoding and encoding not in possible_encodings:
+ possible_encodings.append(encoding)
+ content = e = None
+ infile = open(fname, 'rb')
+ try:
+ content = infile.read()
+ except Exception as e:
+ content = None
+ finally:
+ infile.close()
+
+ if content is None:
+ e = str(e)
+ return (
+ 'ERROR: ' + e,
+ None, None, None, None
+ )
return None, None, None, headers, content
except RedirectException as e:
return None, e.errcode, e.newurl, None, None
+ except (OSError, http.client.IncompleteRead) as e:
+ error = str(e)
+ self.log(' Error: %s' % error)
+ return error, None, None, None, None
+
except IOError as e:
if (e[0] == "http error") and (e[1] == -1):
error = None
def get_ftp_welcome(self):
global ftpcache_key
- _welcome = urllib.request._opener.ftpcache[ftpcache_key].ftp.welcome
+ _welcome = opener.ftpcache[ftpcache_key].ftp.welcome
# I am assuming there are no duplicate ftp URLs in db.
# If there are - ftpcache_key in next line is invalid.
ftpcache_key = None
def finish_check_url(self, bookmark):
robot_base.finish_check_url(self, bookmark)
urllib.request.urlcleanup()
+ urllib.request._opener = opener