From: Oleg Broytman Date: Sun, 9 Mar 2008 15:55:20 +0000 (+0000) Subject: Title (and refresh) can be None. X-Git-Tag: v4.5.3~165 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=8ae42a33f8948c056f5acff3f9748a28700c55f0;p=bookmarks_db.git Title (and refresh) can be None. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@211 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/Robots/parse_html.py b/Robots/parse_html.py index 47cbb19..fd901a8 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -77,7 +77,7 @@ def parse_html(filename, charset=None, log=None): if log: log("Parser %s.%s failed, trying next one." % (p.__module__, p.__name__)) converted_title = title = parser.title - if not parser.charset: + if title and (not parser.charset): try: unicode(title, "ascii") except UnicodeDecodeError: @@ -86,7 +86,7 @@ def parse_html(filename, charset=None, log=None): if parser.charset: parser.charset = parser.charset.lower().replace("windows-", "cp") - if parser.charset and ( + if title and parser.charset and ( (parser.charset <> universal_charset) or ((not charset) or (charset <> parser.charset))): try: @@ -109,11 +109,12 @@ def parse_html(filename, charset=None, log=None): else: if log: log(" title : %s" % title) - final_title = recode_entities(converted_title, universal_charset) - parts = [s.strip() for s in final_title.replace('\r', '').split('\n')] - final_title = ' '.join([s for s in parts if s]) - if log and (final_title <> converted_title): log(" final title : %s" % final_title) - parser.title = final_title + if title: + final_title = recode_entities(converted_title, universal_charset) + parts = [s.strip() for s in final_title.replace('\r', '').split('\n')] + final_title = ' '.join([s for s in parts if s]) + if log and (final_title <> converted_title): log(" final title : %s" % final_title) + parser.title = final_title return parser diff --git a/Robots/parse_html_beautifulsoup.py b/Robots/parse_html_beautifulsoup.py index db291f9..f5e03b5 100644 --- a/Robots/parse_html_beautifulsoup.py +++ b/Robots/parse_html_beautifulsoup.py @@ -87,17 +87,16 @@ def parse_html(filename, charset=None, log=None): # Lookup TITLE in the root title = root.title - if title is None: - title = '' - elif title.string: - title = title.string.encode(_charset) - else: - parts = [] - for part in title: - if not isinstance(part, basestring): - part = unicode(part) - parts.append(part.strip()) - title = ''.join(parts).encode(_charset) + if title is not None: + if title.string: + title = title.string.encode(_charset) + else: + parts = [] + for part in title: + if not isinstance(part, basestring): + part = unicode(part) + parts.append(part.strip()) + title = ''.join(parts).encode(_charset) meta = head.find(_find_contenttype, recursive=False) if meta: diff --git a/Robots/parse_html_htmlparser.py b/Robots/parse_html_htmlparser.py index cccfe8c..493db3e 100644 --- a/Robots/parse_html_htmlparser.py +++ b/Robots/parse_html_htmlparser.py @@ -16,8 +16,8 @@ class HTMLParser(_HTMLParser): _HTMLParser.__init__(self) self.charset = charset self.meta_charset = 0 - self.title = '' - self.refresh = '' + self.title = None + self.refresh = None self.icon = None def end_head(self): diff --git a/check_title.py b/check_title.py index 2a4f522..23caeb8 100755 --- a/check_title.py +++ b/check_title.py @@ -53,7 +53,7 @@ def run(): if hasattr(object, "moved") or hasattr(object, "error"): continue - if hasattr(object, "real_title"): + if hasattr(object, "real_title") and (object.real_title is not None): unquoted_title = unquote_title(quote_title(object.real_title)) unquoted_name = unquote_title(object.name) if unquoted_name <> unquoted_title: