reloc_dict = {
- 301: "perm.",
+ 301: "perm1.",
302: "temp2.",
303: "temp3.",
307: "temp7.",
+ 308: "temp8.",
"html": "html"
}
if headers:
try:
content_type = headers["Content-Type"]
- self.log(" Content-Type: %s" % content_type)
+ self.log(" Content-Type : %s" % content_type)
+ if content_type is None:
+ if 'html' in content.lower():
+ content_type = 'text/html'
+ else:
+ content_type = 'text/plain'
+ self.log(" Set Content-Type to: %s"
+ % content_type)
try:
# extract charset from
# "text/html; foo; charset=UTF-8, bar; baz;"
except (ValueError, IndexError):
charset = None
self.log(" no charset in Content-Type header")
+ is_html = False
for ctype in ("text/html", "application/xhtml+xml"):
if content_type.startswith(ctype):
- html = True
+ is_html = True
break
- else:
- html = False
- if html:
- parser = parse_html(content, charset, self.log)
+ content_stripped = content.strip()
+ if content_stripped and is_html:
+ parser = parse_html(content_stripped, charset, self.log)
if parser:
bookmark.real_title = parser.title
icon = parser.icon
icons[icon_url] = None
else:
content_type = icon_headers["Content-Type"]
- if content_type.startswith("application/") \
- or content_type.startswith("image/") \
- or content_type.startswith("text/plain"):
+ if content_type and (
+ content_type.startswith("application/")
+ or content_type.startswith("image/")
+ or content_type.startswith("text/plain")
+ ):
bookmark.icon_href = icon_url
self.log(" got icon : %s"
% content_type)
% (url, timeout)
)
+ if not content_stripped:
+ self.log(" empty response, no content")
+ if not is_html:
+ self.log(" not html")
except KeyError as key:
self.log(" no header: %s" % key)
return 1
def set_redirect(self, bookmark, errcode, newurl):
- bookmark.moved = "(%s) to %s" % (reloc_dict[errcode], newurl)
- self.log(' Moved: %s' % bookmark.moved)
+ bookmark.moved = moved = "(%s) to %s" % (reloc_dict[errcode], newurl)
+ try:
+ moved.encode('ascii')
+ except UnicodeEncodeError:
+ try:
+ moved = moved.encode(bookmark.charset)
+ except (LookupError, TypeError, UnicodeEncodeError):
+ moved = moved.encode('utf-8')
+ self.log(' Moved: %s' % moved)
def finish_check_url(self, bookmark):
start = self.start