Fix(parse_html): Fix import

[bookmarks_db.git] / parse_html / bkmk_parse_html.py
diff --git a/parse_html/bkmk_parse_html.py b/parse_html/bkmk_parse_html.py

index 6afb7f9df735f6a487b6b7537fc23f2c3daa71c2..2e7df1a2b90fde750d0d47aafcf073a11c60c3a5 100644 (file)
--- a/parse_html/bkmk_parse_html.py
+++ b/parse_html/bkmk_parse_html.py
@@ -13,6 +13,13 @@ __all__ = ['parse_html', 'parse_filename', 'universal_charset']
  
  import codecs
  import os
+import re
+from htmlentitydefs import name2codepoint
+
+from compat import unicode, unichr
+
+DEFAULT_CHARSET = "cp1251"  # Stupid default for Russian Cyrillic
+parsers = []
  
  try:
      from . import bkmk_ph_beautifulsoup4
@@ -59,15 +66,8 @@ else:
  # else:
  #     parsers.append(bkmk_ph_etreetidy.parse_html)
  
-import re
-from htmlentitydefs import name2codepoint
-
  universal_charset = "utf-8"
-DEFAULT_CHARSET = "cp1251"  # Stupid default for Russian Cyrillic
-
-parsers = []
-
-entity_re = re.compile("(&\w+;)")
+entity_re = re.compile("(&\\w+;)")
  num_entity_re = re.compile("(&#[0-9]+;)")
  
  
@@ -167,7 +167,7 @@ def parse_html(html_text, charset=None, log=None):
      converted_title = title = parser.title
      if title and (not parser.charset):
          try:
-            unicode(title, "ascii")
+            title.decode("ascii")
          except UnicodeDecodeError:
              parser.charset = DEFAULT_CHARSET
  
@@ -186,7 +186,7 @@ def parse_html(html_text, charset=None, log=None):
              if log: log("   title          : %s" % title)
              if parser.charset != universal_charset:
                  try:
-                    converted_title = unicode(title, parser.charset).\
+                    converted_title = title.decode(parser.charset).\
                          encode(universal_charset)
                  except UnicodeError:
                      if log:
@@ -194,7 +194,7 @@ def parse_html(html_text, charset=None, log=None):
                              "converting from %s"
                              % (parser.charset, DEFAULT_CHARSET))
                      converted_title = \
-                        unicode(title, DEFAULT_CHARSET, "replace").\
+                        title.decode(DEFAULT_CHARSET, "replace").\
                          encode(universal_charset, "replace")
                      parser.charset = DEFAULT_CHARSET
              if log and (converted_title != title):