From 83ac2470649ec5932119ee66dd1ce6829278977c Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 8 Aug 2010 19:52:35 +0000 Subject: [PATCH] Fixed parsing in case of unknown entity. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@243 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Robots/parse_html.py b/Robots/parse_html.py index 80c7aa8..fd8dc92 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -34,8 +34,12 @@ def recode_entities(title, charset): for part in entity_re.split(title): if part not in ("&", "<", ">", """) and \ entity_re.match(part): - part = unichr(name2codepoint.get(part[1:-1], part)).encode(charset) - output.append(part) + _part = name2codepoint.get(part[1:-1], None) + if _part is None: + _part = part + else: + _part = unichr(_part).encode(charset) + output.append(_part) title = ''.join(output) output = [] -- 2.39.2