def parse_html(html_text, charset=None, log=None):
+ if not html_text:
+ return None
root = _parse_html(html_text, charset)
if root is None:
return None
else:
parts = []
for part in title:
- if not isinstance(part, string_type):
- part = part.decode()
- parts.append(part.strip())
+ #if not isinstance(part, string_type):
+ # part = part.decode()
+ if part.strip:
+ parts.append(part.strip())
+ else:
+ parts.append(' ') # Skip tags, they're usually `<br>`
title = ''.join(parts)
meta = head.find(_find_contenttype, recursive=False)
def _find_icon(Tag):
- return (Tag.name == "link") and \
- (Tag.get_attribute_list("rel", '')[0].lower()
- in ('icon', 'shortcut icon'))
+ if Tag.name != "link":
+ return False
+ rel = ' '.join(Tag.get_attribute_list("rel", ''))
+ return rel in ('icon', 'shortcut icon')