X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_rurllib_py3.py;h=268dad43cbb3444b910c8428d436603282503b49;hb=fbb34ad74643bbc69d141e26e105bcef3cdbc62c;hp=1da8ec879e96925e40a8ab9cb9fe2e54407ffb7c;hpb=dd8d2bd3a0f9bf4e09f58d7289437cf43350f373;p=bookmarks_db.git diff --git a/Robots/bkmk_rurllib_py3.py b/Robots/bkmk_rurllib_py3.py index 1da8ec8..268dad4 100644 --- a/Robots/bkmk_rurllib_py3.py +++ b/Robots/bkmk_rurllib_py3.py @@ -5,18 +5,25 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['robot_urllib_py3'] +import http.client import socket import sys +import urllib import urllib.request from Robots.bkmk_robot_base import robot_base, get_error +# Fake to import 'add_headers' +urllib.URLopener = urllib.request.URLopener +urllib.ftpwrapper = urllib.request.ftpwrapper +from Robots.bkmk_rurllib import add_headers # noqa: E402 import not at top + class RedirectException(Exception): def __init__(self, errcode, newurl): @@ -42,6 +49,8 @@ class MyURLopener(urllib.request.URLopener): http_error_303 = http_error_301 # Error 307 -- relocated (temporarily) http_error_307 = http_error_301 + # Error 308 -- relocated (permanently) + http_error_308 = http_error_301 # Error 401 -- authentication required def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): @@ -59,23 +68,7 @@ class MyURLopener(urllib.request.URLopener): urllib.request._opener = opener = MyURLopener() - -# Fake headers to pretend this is a real browser -_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)" -" Gecko/20001221 Firefox/2.0.0" -opener.addheaders[0] = ('User-Agent', _user_agent) -_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % ( - sys.version_info[0], sys.version_info[1], - sys.version_info[2], urllib.request.__version__ -) -opener.addheader('X-User-Agent', _x_user_agent) -opener.addheader('Referer', '') - -opener.addheader('Accept', '*/*') -opener.addheader('Accept-Language', 'ru,en') -opener.addheader('Cache-Control', 'max-age=300') -opener.addheader('Connection', 'close') - +add_headers(opener) urllib_ftpwrapper = urllib.request.ftpwrapper ftpcache_key = None @@ -114,21 +107,19 @@ class robot_urllib_py3(robot_base): ): if encoding and encoding not in possible_encodings: possible_encodings.append(encoding) - content = None - for encoding in possible_encodings: - infile = open(fname, 'rt', encoding=encoding) - try: - content = infile.read() - except UnicodeDecodeError: - infile.close() - continue - else: - break + content = e = None + infile = open(fname, 'rb') + try: + content = infile.read() + except Exception: + content = None + finally: infile.close() if content is None: + e = str(e) return ( - 'ERROR: File encoding was not recognized', + 'ERROR: ' + e, None, None, None, None ) return None, None, None, headers, content @@ -136,7 +127,7 @@ class robot_urllib_py3(robot_base): except RedirectException as e: return None, e.errcode, e.newurl, None, None - except OSError as e: + except (OSError, http.client.IncompleteRead) as e: error = str(e) self.log(' Error: %s' % error) return error, None, None, None, None