X-Git-Url: https://git.phdru.name/?p=dotfiles.git;a=blobdiff_plain;f=bin%2Fget_html_encoding.py;h=4ffb44998588cfc87549c99c295eb103d8701725;hp=8fd9656a2bc5087af7163135305b563661071da8;hb=HEAD;hpb=c5883d2a782366c0a3468a989e756cf37dabbd46 diff --git a/bin/get_html_encoding.py b/bin/get_html_encoding.py index 8fd9656..381be9c 100755 --- a/bin/get_html_encoding.py +++ b/bin/get_html_encoding.py @@ -1,18 +1,13 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 - -from HTMLParser import HTMLParseError from m_lib.net.www.html import HTMLParser as _HTMLParser - class HTMLHeadDone(Exception): pass - class HTMLParser(_HTMLParser): def end_head(self): raise HTMLHeadDone() - def do_meta(self, attrs): http_equiv = "" content = "" @@ -41,14 +36,14 @@ def parse_html(filename): for line in infile: try: parser.feed(line) - except (HTMLParseError, HTMLHeadDone): + except HTMLHeadDone: break infile.close() try: parser.close() - except (HTMLParseError, HTMLHeadDone): + except HTMLHeadDone: pass if hasattr(parser, "charset"): @@ -62,12 +57,12 @@ if __name__ == '__main__': import sys parser = parse_html(sys.argv[1]) if hasattr(parser, "charset"): - print parser.charset + print(parser.charset) else: import chardet charset = chardet.detect(open(sys.argv[1]).read())["encoding"] if charset in ("ISO-8859-2", "MacCyrillic"): charset = "cp1251" - print charset + print(charset) except: pass