From: Oleg Broytman Date: Sun, 8 Aug 2010 19:52:35 +0000 (+0000) Subject: Fixed parsing in case of unknown entity. X-Git-Tag: v4.5.3~133 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=83ac2470649ec5932119ee66dd1ce6829278977c;p=bookmarks_db.git Fixed parsing in case of unknown entity. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@243 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/Robots/parse_html.py b/Robots/parse_html.py index 80c7aa8..fd8dc92 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -34,8 +34,12 @@ def recode_entities(title, charset): for part in entity_re.split(title): if part not in ("&", "<", ">", """) and \ entity_re.match(part): - part = unichr(name2codepoint.get(part[1:-1], part)).encode(charset) - output.append(part) + _part = name2codepoint.get(part[1:-1], None) + if _part is None: + _part = part + else: + _part = unichr(_part).encode(charset) + output.append(_part) title = ''.join(output) output = []