if headers:
try:
content_type = headers["Content-Type"]
+ self.log(" Content-Type: %s" % content_type)
try:
# extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
content_type, charset = content_type.split(';', 1)
except (ValueError, IndexError):
charset = None
self.log(" no charset in Content-Type header")
- if content_type in ("text/html", "application/xhtml+xml"):
+ for ctype in ("text/html", "application/xhtml+xml"):
+ if content_type.startswith(ctype):
+ html = True
+ break
+ else:
+ html = False
+ if html:
parser = parse_html(fname, charset, self.log)
- bookmark.real_title = parser.title
- if parser.refresh:
- refresh = parser.refresh
- try:
- url = refresh.split('=', 1)[1]
- except IndexError:
- url = "self"
- try:
- timeout = float(refresh.split(';')[0])
- except (IndexError, ValueError):
- raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh))
- else:
- try:
- timeout = int(refresh.split(';')[0])
- except ValueError:
- pass # float timeout
- raise RedirectException("html", "%s (%s sec)" % (url, timeout))
-
- # Get favicon.ico
- icon = parser.icon
+ if parser:
+ bookmark.real_title = parser.title
+ if parser.refresh:
+ refresh = parser.refresh
+ try:
+ url = refresh.split('=', 1)[1]
+ except IndexError:
+ url = "self"
+ try:
+ timeout = float(refresh.split(';')[0])
+ except (IndexError, ValueError):
+ raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh))
+ else:
+ try:
+ timeout = int(refresh.split(';')[0])
+ except ValueError:
+ pass # float timeout
+ raise RedirectException("html", "%s (%s sec)" % (url, timeout))
+
+ # Get favicon.ico
+ icon = parser.icon
+ else:
+ icon = None
if not icon:
icon = "/favicon.ico"
icon = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon)
self.log(" looking for icon at: %s" % icon)
if icon in icons:
if icons[icon]:
+ bookmark.icon_href = icon
content_type, bookmark.icon = icons[icon]
self.log(" cached icon: %s" % content_type)
else:
content_type = headers["Content-Type"]
if content_type.startswith("image/"):
icon_file = open(fname, "rb")
- icon = icon_file.read()
+ icon_data = icon_file.read()
icon_file.close()
- bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon))
+ bookmark.icon_href = icon
+ bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data))
self.log(" got icon : %s" % content_type)
icons[icon] = (content_type, bookmark.icon)
else:
icons[icon] = None
os.remove(fname)
- except KeyError:
- pass
+ except KeyError, key:
+ self.log(" no header: %s" % key)
except IOError, msg:
if (msg[0] == "http error") and (msg[1] == -1):
bookmark.no_error = "The server did not return any header - it is not an error, actually"
- self.log(bookmark.no_error)
+ self.log(' no headers: %s' % bookmark.no_error)
else:
bookmark.error = get_error(msg)
- self.log(bookmark.error)
+ self.log(' Error: %s' % bookmark.error)
except EOFError:
bookmark.error = "Unexpected EOF (FTP server closed connection)"
- self.log(bookmark.error)
+ self.log(' EOF: %s' % bookmark.error)
except RedirectException, msg:
bookmark.moved = str(msg)
- self.log(bookmark.moved)
+ self.log(' Moved: %s' % bookmark.moved)
except KeyboardInterrupt:
self.log("Keyboard interrupt (^C)")
import traceback
traceback.print_exc()
bookmark.error = "Exception!"
- self.log(bookmark.error)
+ self.log(' Exception: %s' % bookmark.error)
finally:
self.finish_check_url(bookmark)