From dd8d2bd3a0f9bf4e09f58d7289437cf43350f373 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Mon, 13 Nov 2023 18:13:14 +0300 Subject: [PATCH] Fix(Py3): Guess input file encoding --- Robots/bkmk_rurllib_py3.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/Robots/bkmk_rurllib_py3.py b/Robots/bkmk_rurllib_py3.py index 2b73347..1da8ec8 100644 --- a/Robots/bkmk_rurllib_py3.py +++ b/Robots/bkmk_rurllib_py3.py @@ -106,10 +106,31 @@ class robot_urllib_py3(robot_base): # Remove Accept-Charset del opener.addheaders[-1] - infile = open(fname, 'rt') - content = infile.read() - infile.close() - + possible_encodings = [] + for encoding in ( + bookmark.charset, + sys.getfilesystemencoding(), + 'utf-8', + ): + if encoding and encoding not in possible_encodings: + possible_encodings.append(encoding) + content = None + for encoding in possible_encodings: + infile = open(fname, 'rt', encoding=encoding) + try: + content = infile.read() + except UnicodeDecodeError: + infile.close() + continue + else: + break + infile.close() + + if content is None: + return ( + 'ERROR: File encoding was not recognized', + None, None, None, None + ) return None, None, None, headers, content except RedirectException as e: -- 2.39.2