X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html_htmlparser.py;h=cccfe8c7a4b5adab468accbed54287f009f1df72;hb=38f3645cce7a5875128d788df6631069c761b987;hp=30911dda7012b0ad9aa273692cc766d3eb5cd775;hpb=5f637b5a5fe27098985975928632b9fea5ea3c62;p=bookmarks_db.git diff --git a/Robots/parse_html_htmlparser.py b/Robots/parse_html_htmlparser.py index 30911dd..cccfe8c 100644 --- a/Robots/parse_html_htmlparser.py +++ b/Robots/parse_html_htmlparser.py @@ -38,8 +38,8 @@ class HTMLParser(_HTMLParser): if (not self.charset) and (http_equiv == "content-type"): try: - # extract charset from "text/html; foo; charset=UTF-8; bar;" - self.charset = content.lower().split('charset=')[1].split(';')[0] + # extract charset from "text/html; foo; charset=UTF-8, bar; baz;" + self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0] self.meta_charset = 1 # Remember that the charset was retrieved from # META tag, not from the Content-Type header except IndexError: