From 5bf527cabbb158190e9b1ac17e19684135d9a6d2 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Thu, 27 Dec 2007 19:33:24 +0000 Subject: [PATCH] Strip every line in title. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@135 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/parse_html.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Robots/parse_html.py b/Robots/parse_html.py index 666c707..d3870fa 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -87,7 +87,8 @@ def parse_html(filename, charset=None, log=None): if log: log(" unknown charset: `%s' or `%s'" % (parser.charset, current_charset)) title = recode_entities(title, current_charset) - title = title.replace('\r', '').replace('\n', ' ').strip() + parts = [s.strip() for s in title.replace('\r', '').split('\n')] + title = ' '.join([s for s in parts if s]) if log: log(" final title : %s" % title) parser.title = title return parser -- 2.39.2