]> git.phdru.name Git - bookmarks_db.git/blobdiff - Robots/bkmk_rurllib_py3.py
Fix(Py3): `urllib` writes its files as bytes
[bookmarks_db.git] / Robots / bkmk_rurllib_py3.py
index 2b73347568813505a270d9308e1595172aa98819..73f9b77ca3ef19f0106cc3bd3dc3e16902b5a818 100644 (file)
@@ -11,6 +11,7 @@ __license__ = "GNU GPL"
 __all__ = ['robot_urllib_py3']
 
 
+import http.client
 import socket
 import sys
 import urllib.request
@@ -42,6 +43,8 @@ class MyURLopener(urllib.request.URLopener):
     http_error_303 = http_error_301
     # Error 307 -- relocated (temporarily)
     http_error_307 = http_error_301
+    # Error 308 -- relocated (permanently)
+    http_error_308 = http_error_301
 
     # Error 401 -- authentication required
     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
@@ -106,16 +109,35 @@ class robot_urllib_py3(robot_base):
                     # Remove Accept-Charset
                     del opener.addheaders[-1]
 
-            infile = open(fname, 'rt')
-            content = infile.read()
-            infile.close()
-
+            possible_encodings = []
+            for encoding in (
+                    bookmark.charset,
+                    sys.getfilesystemencoding(),
+                    'utf-8',
+            ):
+                if encoding and encoding not in possible_encodings:
+                    possible_encodings.append(encoding)
+            content = e = None
+            infile = open(fname, 'rb')
+            try:
+                content = infile.read()
+            except Exception as e:
+                content = None
+            finally:
+                infile.close()
+
+            if content is None:
+                e = str(e)
+                return (
+                    'ERROR: ' + e,
+                    None, None, None, None
+                )
             return None, None, None, headers, content
 
         except RedirectException as e:
             return None, e.errcode, e.newurl, None, None
 
-        except OSError as e:
+        except (OSError, http.client.IncompleteRead) as e:
             error = str(e)
             self.log('   Error: %s' % error)
             return error, None, None, None, None