]> git.phdru.name Git - bookmarks_db.git/blobdiff - Robots/parse_html.py
Try a list of charsets, including the universal (utf-8) and the default ones.
[bookmarks_db.git] / Robots / parse_html.py
index c7acb6426e7dfbf386702ffb18fe5f12b3bed1ce..6fe1df954236f855e2f08ae1ca048b4f275a704a 100755 (executable)
@@ -56,7 +56,15 @@ def parse_html(filename, charset=None, log=None):
          charset = None         # ...try charset from HTML
 
    for p in parsers:
-      parser = p(filename, charset)
+      charsets = [universal_charset, DEFAULT_CHARSET]
+      if charset not in charsets:
+         charsets.insert(0, charset)
+      parser = None
+      for c in charsets:
+         try:
+            parser = p(filename, c)
+         except UnicodeEncodeError:
+            pass
       if parser:
          break
       else: