]> git.phdru.name Git - bookmarks_db.git/blobdiff - Robots/bkmk_rsimple.py
Title (and refresh) can be None.
[bookmarks_db.git] / Robots / bkmk_rsimple.py
index 32083ee95b254e264f367b5becb18a8f26d91d78..f481d66e1bce396d46387571211d010eeea40a52 100644 (file)
@@ -1,7 +1,7 @@
 """
    Simple, strightforward robot
 
-   Written by Oleg BroytMann. Copyright (C) 2000-2007 PhiloSoft Design.
+   Written by Oleg BroytMann. Copyright (C) 2000-2008 PhiloSoft Design.
 """
 
 import sys, os
@@ -157,14 +157,15 @@ class robot_simple(Robot):
             try:
                content_type = headers["Content-Type"]
                try:
-                  content_type, charset = content_type.split(';')
+                  # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+                  content_type, charset = content_type.split(';', 1)
                   content_type = content_type.strip()
-                  charset = charset.split('=')[1].strip()
+                  charset = charset.split('=')[1].strip().split(',')[0]
                   self.log("   HTTP charset   : %s" % charset)
                except (ValueError, IndexError):
                   charset = None
                   self.log("   no charset in Content-Type header")
-               if content_type == "text/html":
+               if content_type in ("text/html", "application/xhtml+xml"):
                   parser = parse_html(fname, charset, self.log)
                   bookmark.real_title = parser.title
                   if parser.refresh: