if elem.tag.startswith(XHTML):
elem.tag = elem.tag[len(XHTML):]
+ title = html_tree.findtext('head/title')
+ if title is None:
+ return None
+
meta = html_tree.findall('head/meta')
for m in meta:
if m.get('http-equiv', '').lower() == 'content-type':
meta_content = m.get("content")
if meta_content:
- meta_charset = \
- meta_content.lower().split('charset=')[1].split(';')[0]
- break
+ try:
+ meta_charset = \
+ meta_content.lower().split('charset=')[1].split(';')[0]
+ break
+ except IndexError:
+ meta_charset = False
else:
meta_charset = False
- title = html_tree.findtext('head/title')
- if title and (charset or meta_charset):
+ if charset or meta_charset:
title = title.encode(charset or meta_charset)
for m in meta: