There are sites that provide incorrect
(most probably misspelled) charset.
break
content_stripped = content.strip()
if content_stripped and charset:
- content_stripped = content_stripped.decode(
- charset, 'replace')
+ try:
+ content_stripped = content_stripped.decode(
+ charset, 'replace')
+ except LookupError:
+ charset = None
+ self.log(" unknown charset "
+ "in Content-Type header")
if content_stripped and is_html:
parser = parse_html(
content_stripped, charset, self.log)