]> git.phdru.name Git - bookmarks_db.git/blobdiff - Robots/bkmk_rurllib_py3.py
Fix(Robots/bkmk_rrequests): No need to re-check error 404 via proxy
[bookmarks_db.git] / Robots / bkmk_rurllib_py3.py
index 1daec389aebaa27f3eb502cc2ef2016310dbea4c..268dad43cbb3444b910c8428d436603282503b49 100644 (file)
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['robot_urllib_py3']
@@ -14,10 +14,16 @@ __all__ = ['robot_urllib_py3']
 import http.client
 import socket
 import sys
+import urllib
 import urllib.request
 
 from Robots.bkmk_robot_base import robot_base, get_error
 
+# Fake to import 'add_headers'
+urllib.URLopener = urllib.request.URLopener
+urllib.ftpwrapper = urllib.request.ftpwrapper
+from Robots.bkmk_rurllib import add_headers  # noqa: E402 import not at top
+
 
 class RedirectException(Exception):
     def __init__(self, errcode, newurl):
@@ -62,23 +68,7 @@ class MyURLopener(urllib.request.URLopener):
 
 
 urllib.request._opener = opener = MyURLopener()
-
-# Fake headers to pretend this is a real browser
-_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
-" Gecko/20001221 Firefox/2.0.0"
-opener.addheaders[0] = ('User-Agent', _user_agent)
-_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % (
-   sys.version_info[0], sys.version_info[1],
-   sys.version_info[2], urllib.request.__version__
-)
-opener.addheader('X-User-Agent', _x_user_agent)
-opener.addheader('Referer', '')
-
-opener.addheader('Accept', '*/*')
-opener.addheader('Accept-Language', 'ru,en')
-opener.addheader('Cache-Control', 'max-age=300')
-opener.addheader('Connection', 'close')
-
+add_headers(opener)
 
 urllib_ftpwrapper = urllib.request.ftpwrapper
 ftpcache_key = None
@@ -117,21 +107,19 @@ class robot_urllib_py3(robot_base):
             ):
                 if encoding and encoding not in possible_encodings:
                     possible_encodings.append(encoding)
-            content = None
-            for encoding in possible_encodings:
-                infile = open(fname, 'rt', encoding=encoding)
-                try:
-                    content = infile.read()
-                except UnicodeDecodeError:
-                    infile.close()
-                    continue
-                else:
-                    break
+            content = e = None
+            infile = open(fname, 'rb')
+            try:
+                content = infile.read()
+            except Exception:
+                content = None
+            finally:
                 infile.close()
 
             if content is None:
+                e = str(e)
                 return (
-                    'ERROR: File encoding was not recognized',
+                    'ERROR: ' + e,
                     None, None, None, None
                 )
             return None, None, None, headers, content