]> git.phdru.name Git - bookmarks_db.git/blobdiff - Robots/bkmk_rurllib_py3.py
Fix(Py3): Guess input file encoding
[bookmarks_db.git] / Robots / bkmk_rurllib_py3.py
index aca07cd86a09d21845597a3d84f76f10c98a2a48..1da8ec879e96925e40a8ab9cb9fe2e54407ffb7c 100644 (file)
@@ -106,15 +106,41 @@ class robot_urllib_py3(robot_base):
                     # Remove Accept-Charset
                     del opener.addheaders[-1]
 
-            infile = open(fname, 'rt')
-            content = infile.read()
-            infile.close()
-
+            possible_encodings = []
+            for encoding in (
+                    bookmark.charset,
+                    sys.getfilesystemencoding(),
+                    'utf-8',
+            ):
+                if encoding and encoding not in possible_encodings:
+                    possible_encodings.append(encoding)
+            content = None
+            for encoding in possible_encodings:
+                infile = open(fname, 'rt', encoding=encoding)
+                try:
+                    content = infile.read()
+                except UnicodeDecodeError:
+                    infile.close()
+                    continue
+                else:
+                    break
+                infile.close()
+
+            if content is None:
+                return (
+                    'ERROR: File encoding was not recognized',
+                    None, None, None, None
+                )
             return None, None, None, headers, content
 
         except RedirectException as e:
             return None, e.errcode, e.newurl, None, None
 
+        except OSError as e:
+            error = str(e)
+            self.log('   Error: %s' % error)
+            return error, None, None, None, None
+
         except IOError as e:
             if (e[0] == "http error") and (e[1] == -1):
                 error = None