From 284b0935fc4237bda2e51229860c771f78887be2 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 24 Feb 2008 13:02:24 +0000 Subject: [PATCH] Do not unquote standard HTML entities. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@182 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Robots/parse_html.py b/Robots/parse_html.py index 0d7b20b..137ebfa 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -33,7 +33,8 @@ def recode_entities(title, charset): output = [] for part in entity_re.split(title): if entity_re.match(part): - part = entitydefs.get(part[1:-1], part) + if part not in ("&", "<", ">", ""e;", " "): + part = entitydefs.get(part[1:-1], part) output.append(part) output2 = [] -- 2.39.2