From: Oleg Broytman Date: Fri, 23 Jan 2004 09:09:28 +0000 (+0000) Subject: Set unicode encoding/decoding error mode to "replace". X-Git-Tag: v4.5.3~347 X-Git-Url: https://git.phdru.name/?p=bookmarks_db.git;a=commitdiff_plain;h=1f4d6d3d0e3a55e3390ef0f0e20f2358baa57fc1 Set unicode encoding/decoding error mode to "replace". git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@29 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/bkmk_parser.py b/bkmk_parser.py index 37f5f77..327e0c4 100755 --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -5,7 +5,7 @@ """ -import os +import sys, os from m_lib.net.www.html import HTMLParser from bkmk_objects import Folder, Bookmark, Ruler @@ -29,6 +29,8 @@ else: dump_names = debug +DEFAULT_CHARSET = None + class BkmkParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) @@ -42,8 +44,8 @@ class BkmkParser(HTMLParser): def handle_data(self, data): if data: - if self.charset: - data = unicode(data, self.charset).encode() + if DEFAULT_CHARSET: + data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "replace") self.accumulator += data @@ -65,6 +67,9 @@ class BkmkParser(HTMLParser): self.charset = content.split('=')[1] except IndexError: pass + else: + global DEFAULT_CHARSET + DEFAULT_CHARSET = sys.getdefaultencoding() def start_title(self, attrs):