Extract charset from "text/html; foo; charset=UTF-8, bar; baz;"

[bookmarks_db.git] / Robots / parse_html_htmlparser.py
diff --git a/Robots/parse_html_htmlparser.py b/Robots/parse_html_htmlparser.py

index 30911dda7012b0ad9aa273692cc766d3eb5cd775..cccfe8c7a4b5adab468accbed54287f009f1df72 100644 (file)
--- a/Robots/parse_html_htmlparser.py
+++ b/Robots/parse_html_htmlparser.py
@@ -38,8 +38,8 @@ class HTMLParser(_HTMLParser):
  
        if (not self.charset) and (http_equiv == "content-type"):
           try:
-            # extract charset from "text/html; foo; charset=UTF-8; bar;"
-            self.charset = content.lower().split('charset=')[1].split(';')[0]
+            # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+            self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0]
              self.meta_charset = 1 # Remember that the charset was retrieved from
                                    # META tag, not from the Content-Type header
           except IndexError: