summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
efd4317)
Decode to unicode from a known encoding.
def parse_html(html_text, charset=None, log=None):
if not html_text:
return None
def parse_html(html_text, charset=None, log=None):
if not html_text:
return None
+ if charset is None and isinstance(html_text, bytes):
+ return None # html.parser cannot parse bytes
+ if charset and isinstance(html_text, bytes):
+ html_text = html_text.decode(charset)
+
parser = HTMLParser(charset)
try:
parser = HTMLParser(charset)
try: