X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=bkmk_parser.py;h=327e0c45f2dd2beb56711cd2e42eb0747e13bfd7;hb=5fe08492a0172cb8fb53b8cf3515540def24d76f;hp=37f5f7743d33e0fc48527832559677f8775ba66a;hpb=364bdf891fa30093391583c449ebc7cbab4ec2a5;p=bookmarks_db.git diff --git a/bkmk_parser.py b/bkmk_parser.py index 37f5f77..327e0c4 100755 --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -5,7 +5,7 @@ """ -import os +import sys, os from m_lib.net.www.html import HTMLParser from bkmk_objects import Folder, Bookmark, Ruler @@ -29,6 +29,8 @@ else: dump_names = debug +DEFAULT_CHARSET = None + class BkmkParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) @@ -42,8 +44,8 @@ class BkmkParser(HTMLParser): def handle_data(self, data): if data: - if self.charset: - data = unicode(data, self.charset).encode() + if DEFAULT_CHARSET: + data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "replace") self.accumulator += data @@ -65,6 +67,9 @@ class BkmkParser(HTMLParser): self.charset = content.split('=')[1] except IndexError: pass + else: + global DEFAULT_CHARSET + DEFAULT_CHARSET = sys.getdefaultencoding() def start_title(self, attrs):