]> git.phdru.name Git - bookmarks_db.git/commitdiff
Do not unquote standard HTML entities.
authorOleg Broytman <phd@phdru.name>
Sun, 24 Feb 2008 13:02:24 +0000 (13:02 +0000)
committerOleg Broytman <phd@phdru.name>
Sun, 24 Feb 2008 13:02:24 +0000 (13:02 +0000)
git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@182 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23

Robots/parse_html.py

index 0d7b20b5ce82b88d596b1cb25aac90084a997735..137ebfad9809d369712483bfa63b97744f7c6f1a 100755 (executable)
@@ -33,7 +33,8 @@ def recode_entities(title, charset):
    output = []
    for part in entity_re.split(title):
       if entity_re.match(part):
-         part = entitydefs.get(part[1:-1], part)
+         if part not in ("&amp;", "&lt;", "&gt;", "&quote;", "&nbsp;"):
+            part = entitydefs.get(part[1:-1], part)
       output.append(part)
 
    output2 = []