]> git.phdru.name Git - bookmarks_db.git/commitdiff
Ignore case for DOCTYPE.
authorOleg Broytman <phd@phdru.name>
Tue, 4 Mar 2008 10:55:43 +0000 (10:55 +0000)
committerOleg Broytman <phd@phdru.name>
Tue, 4 Mar 2008 10:55:43 +0000 (10:55 +0000)
git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@200 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23

Robots/parse_html_beautifulsoup.py

index c2a4032f042eb5c22930d9443da3fecb8cd9e5d7..ca8ee2e1a0ecc2bdd4f78524e04638a664ec508c 100644 (file)
@@ -39,7 +39,7 @@ class BadDeclParser(BeautifulSoup):
              except SGMLParseError:
                  # Could not parse the DOCTYPE declaration
                  # Try to just skip the actual declaration
-                 match = re.search(r'<!DOCTYPE([^>]*?)>', self.rawdata[i:], re.MULTILINE)
+                 match = re.search(r'<!DOCTYPE([^>]*?)>', self.rawdata[i:], re.MULTILINE|re.I)
                  if match:
                      toHandle = self.rawdata[i:match.end()]
                  else: