Fixed a bug in handling place: URIs (do not append '//').
+ Extract html redirect even if ther is no title.
+
WHAT'S NEW in version 4.5.1 (2011-12-28).
# Lookup TITLE in the root
title = root.title
- if title is None:
- return None
-
- if title.string:
- title = title.string
- else:
- parts = []
- for part in title:
- if not isinstance(part, basestring):
- part = unicode(part)
- parts.append(part.strip())
- title = ''.join(parts)
+ if title is not None:
+ if title.string:
+ title = title.string
+ else:
+ parts = []
+ for part in title:
+ if not isinstance(part, basestring):
+ part = unicode(part)
+ parts.append(part.strip())
+ title = ''.join(parts)
meta = head.find(_find_contenttype, recursive=False)
if meta:
else:
meta_charset = False
- if _charset or meta_charset:
+ if title and (_charset or meta_charset):
title = title.encode(_charset or meta_charset)
meta = head.find(_find_refresh, recursive=False)
else:
icon = None
+ if (title is None) and (refresh is None) and (icon is None):
+ return None
return HTMLParser(_charset, meta_charset, title, refresh, icon)
def _find_contenttype(Tag):
title = html_tree.findtext('head/title')
if title is None:
title = html_tree.findtext('title')
- if title is None:
- return None
meta = html_tree.findall('head/meta')
for m in meta:
else:
meta_charset = False
- if charset or meta_charset:
+ if title and (charset or meta_charset):
title = title.encode(charset or meta_charset)
for m in meta:
else:
icon = None
+ if (title is None) and (refresh is None) and (icon is None):
+ return None
return HTMLParser(charset, meta_charset, title, refresh, icon)
else:
title = ''
- if title is None:
- return None
-
for node in head.childNodes:
if node.name == 'meta' and \
('http-equiv' in node.attributes) and \
if not charset:
charset = parser.tokenizer.stream.charEncoding[0]
- if charset or meta_charset:
+ if title and (charset or meta_charset):
title = title.encode(charset or meta_charset)
for node in head.childNodes:
else:
title = ''
- if title is None:
- return None
-
+ if (title is None) and (refresh is None) and (icon is None):
+ return None
return HTMLParser(charset, meta_charset, title, refresh, icon)
except (HTMLParseError, HTMLHeadDone):
pass
- if parser.title is None:
+ if (parser.title is None) and (parser.refresh is None) and (parser.icon is None):
return None
-
return parser
title = html_tree.findtext('head/title')
if title is None:
title = html_tree.findtext('title')
- if title is None:
- return None
meta = html_tree.findall('head/meta')
for m in meta:
else:
meta_charset = False
- if charset or meta_charset:
+ if title and (charset or meta_charset):
title = title.encode(charset or meta_charset)
for m in meta:
else:
icon = None
+ if (title is None) and (refresh is None) and (icon is None):
+ return None
return HTMLParser(charset, meta_charset, title, refresh, icon)