From 1f4d6d3d0e3a55e3390ef0f0e20f2358baa57fc1 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Fri, 23 Jan 2004 09:09:28 +0000 Subject: [PATCH] Set unicode encoding/decoding error mode to "replace". git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@29 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- bkmk_parser.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bkmk_parser.py b/bkmk_parser.py index 37f5f77..327e0c4 100755 --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -5,7 +5,7 @@ """ -import os +import sys, os from m_lib.net.www.html import HTMLParser from bkmk_objects import Folder, Bookmark, Ruler @@ -29,6 +29,8 @@ else: dump_names = debug +DEFAULT_CHARSET = None + class BkmkParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) @@ -42,8 +44,8 @@ class BkmkParser(HTMLParser): def handle_data(self, data): if data: - if self.charset: - data = unicode(data, self.charset).encode() + if DEFAULT_CHARSET: + data = unicode(data, self.charset, "replace").encode(DEFAULT_CHARSET, "replace") self.accumulator += data @@ -65,6 +67,9 @@ class BkmkParser(HTMLParser): self.charset = content.split('=')[1] except IndexError: pass + else: + global DEFAULT_CHARSET + DEFAULT_CHARSET = sys.getdefaultencoding() def start_title(self, attrs): -- 2.39.5