X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html_htmlparser.py;h=df37f752d86a3a55b7057b5357daf57e1c8efbe6;hb=ee556aa2ed24d28ad25cbbe31f9141e4e8fdc932;hp=e1a35f1007babbf5a31c93751da082dc3e94156d;hpb=a147d51d168748fe91f9ee8e27fcc065d12658d8;p=bookmarks_db.git

diff --git a/Robots/parse_html_htmlparser.py b/Robots/parse_html_htmlparser.py
index e1a35f1..df37f75 100644
--- a/Robots/parse_html_htmlparser.py
+++ b/Robots/parse_html_htmlparser.py
@@ -1,7 +1,7 @@
 """
    HTML Parser
 
-   Written by BroytMann. Copyright (C) 1997-2007 PhiloSoft Design
+   Written by Broytman. Copyright (C) 1997-2008 PhiloSoft Design
 """
 
 from HTMLParser import HTMLParseError
@@ -16,8 +16,8 @@ class HTMLParser(_HTMLParser):
       _HTMLParser.__init__(self)
       self.charset = charset
       self.meta_charset = 0
-      self.title = ''
-      self.refresh = ''
+      self.title = None
+      self.refresh = None
       self.icon = None
 
    def end_head(self):
@@ -38,8 +38,8 @@ class HTMLParser(_HTMLParser):
 
       if (not self.charset) and (http_equiv == "content-type"):
          try:
-            # extract charset from "text/html; foo; charset=UTF-8; bar;"
-            self.charset = content.lower().split('charset=')[1].split(';')[0]
+            # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+            self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0]
             self.meta_charset = 1 # Remember that the charset was retrieved from
                                   # META tag, not from the Content-Type header
          except IndexError:
@@ -73,7 +73,7 @@ class HTMLParser(_HTMLParser):
          self.icon = href
 
 
-def parse_html(filename, charset=None):
+def parse_html(filename, charset=None, log=None):
    infile = open(filename, 'r')
    parser = HTMLParser(charset)