"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
if (not self.charset) and (http_equiv == "content-type"):
try:
- # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
- self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0]
+ # extract charset from
+ # "text/html; foo; charset=UTF-8, bar; baz;"
+ self.charset = content.lower().split('charset=')[1].\
+ split(';')[0].split(',')[0]
# Remember that the charset was retrieved from
# META tag, not from the Content-Type header
self.meta_charset = 1
self.accumulator = ''
def end_title(self):
- if not self.title: # use only the first title
+ if not self.title: # use only the first title
self.title = self.accumulator
def do_link(self, attrs):
for attrname, value in attrs:
if value:
value = value.strip()
- if (attrname == 'rel') and (value.lower() in ('icon', 'shortcut icon')):
+ if (attrname == 'rel') and (
+ value.lower() in ('icon', 'shortcut icon')
+ ):
has_icon = True
elif attrname == 'href':
href = value
except (HTMLParseError, HTMLHeadDone):
pass
- if (parser.title is None) and (parser.refresh is None) and (parser.icon is None):
+ if (parser.title is None) and (parser.refresh is None) \
+ and (parser.icon is None):
return None
return parser