Fixed a bug.

[bookmarks_db.git] / Robots / parse_html_htmlparser.py
diff --git a/Robots/parse_html_htmlparser.py b/Robots/parse_html_htmlparser.py

index e1a35f1007babbf5a31c93751da082dc3e94156d..77021624ff9543a82ebe26cc170398a49f13a6bb 100644 (file)
--- a/Robots/parse_html_htmlparser.py
+++ b/Robots/parse_html_htmlparser.py
@@ -1,7 +1,7 @@
  """
     HTML Parser
  
-   Written by BroytMann. Copyright (C) 1997-2007 PhiloSoft Design
+   Written by Broytman. Copyright (C) 1997-2010 PhiloSoft Design
  """
  
  from HTMLParser import HTMLParseError
@@ -16,8 +16,8 @@ class HTMLParser(_HTMLParser):
        _HTMLParser.__init__(self)
        self.charset = charset
        self.meta_charset = 0
-      self.title = ''
-      self.refresh = ''
+      self.title = None
+      self.refresh = None
        self.icon = None
  
     def end_head(self):
@@ -38,8 +38,8 @@ class HTMLParser(_HTMLParser):
  
        if (not self.charset) and (http_equiv == "content-type"):
           try:
-            # extract charset from "text/html; foo; charset=UTF-8; bar;"
-            self.charset = content.lower().split('charset=')[1].split(';')[0]
+            # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+            self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0]
              self.meta_charset = 1 # Remember that the charset was retrieved from
                                    # META tag, not from the Content-Type header
           except IndexError:
@@ -63,7 +63,7 @@ class HTMLParser(_HTMLParser):
  
        for attrname, value in attrs:
           if value:
-            value = value.strip().lower()
+            value = value.strip()
              if (attrname == 'rel') and (value.lower() in ('icon', 'shortcut icon')):
                 has_icon = True
              elif attrname == 'href':
@@ -73,7 +73,7 @@ class HTMLParser(_HTMLParser):
           self.icon = href
  
  
-def parse_html(filename, charset=None):
+def parse_html(filename, charset=None, log=None):
     infile = open(filename, 'r')
     parser = HTMLParser(charset)
  
@@ -90,4 +90,7 @@ def parse_html(filename, charset=None):
     except (HTMLParseError, HTMLHeadDone):
        pass
  
+   if parser.title is None:
+      return None
+
     return parser