"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['robot_urllib_py3']
+import http.client
import socket
import sys
+import urllib
import urllib.request
from Robots.bkmk_robot_base import robot_base, get_error
+# Fake to import 'add_headers'
+urllib.URLopener = urllib.request.URLopener
+urllib.ftpwrapper = urllib.request.ftpwrapper
+from Robots.bkmk_rurllib import add_headers # noqa: E402 import not at top
+
class RedirectException(Exception):
def __init__(self, errcode, newurl):
http_error_303 = http_error_301
# Error 307 -- relocated (temporarily)
http_error_307 = http_error_301
+ # Error 308 -- relocated (permanently)
+ http_error_308 = http_error_301
# Error 401 -- authentication required
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
urllib.request._opener = opener = MyURLopener()
-
-# Fake headers to pretend this is a real browser
-_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
-" Gecko/20001221 Firefox/2.0.0"
-opener.addheaders[0] = ('User-Agent', _user_agent)
-_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % (
- sys.version_info[0], sys.version_info[1],
- sys.version_info[2], urllib.request.__version__
-)
-opener.addheader('X-User-Agent', _x_user_agent)
-opener.addheader('Referer', '')
-
-opener.addheader('Accept', '*/*')
-opener.addheader('Accept-Language', 'ru,en')
-opener.addheader('Cache-Control', 'max-age=300')
-opener.addheader('Connection', 'close')
-
+add_headers(opener)
urllib_ftpwrapper = urllib.request.ftpwrapper
ftpcache_key = None
# Remove Accept-Charset
del opener.addheaders[-1]
- infile = open(fname, 'rt')
- content = infile.read()
- infile.close()
-
+ possible_encodings = []
+ for encoding in (
+ bookmark.charset,
+ sys.getfilesystemencoding(),
+ 'utf-8',
+ ):
+ if encoding and encoding not in possible_encodings:
+ possible_encodings.append(encoding)
+ content = e = None
+ infile = open(fname, 'rb')
+ try:
+ content = infile.read()
+ except Exception:
+ content = None
+ finally:
+ infile.close()
+
+ if content is None:
+ e = str(e)
+ return (
+ 'ERROR: ' + e,
+ None, None, None, None
+ )
return None, None, None, headers, content
except RedirectException as e:
return None, e.errcode, e.newurl, None, None
+ except (OSError, http.client.IncompleteRead) as e:
+ error = str(e)
+ self.log(' Error: %s' % error)
+ return error, None, None, None, None
+
except IOError as e:
if (e[0] == "http error") and (e[1] == -1):
error = None