- content_type = headers["Content-Type"]
- try:
- content_type, charset = content_type.split(';')
- content_type = content_type.strip()
- charset = charset.split('=')[1].strip()
- if self.log: self.log(" HTTP charset : %s" % charset)
- except (ValueError, IndexError):
- charset = None
- if self.log: self.log(" no charset in Content-Type header")
- if content_type == "text/html":
- parser = parse_html(fname, charset, self.log)
- title = parser.title.replace('\r', '').replace('\n', ' ').strip()
- bookmark.real_title = parser.unescape(title)
- if self.log: self.log(" final title : %s" % bookmark.real_title)
- if parser.refresh:
- refresh = parser.refresh
+ # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+ content_type, charset = content_type.split(';', 1)
+ content_type = content_type.strip()
+ charset = charset.split('=')[1].strip().split(',')[0]
+ self.log(" HTTP charset : %s" % charset)
+ except (ValueError, IndexError):
+ charset = None
+ self.log(" no charset in Content-Type header")
+ for ctype in ("text/html", "application/xhtml+xml"):
+ if content_type.startswith(ctype):
+ html = True
+ break
+ else:
+ html = False
+ if html:
+ parser = parse_html(fname, charset, self.log)
+ if parser:
+ bookmark.real_title = parser.title
+ icon = parser.icon
+ else:
+ icon = None
+ if not icon:
+ icon = "/favicon.ico"
+ icon = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon)
+ self.log(" looking for icon at: %s" % icon)
+ if icon in icons:
+ if icons[icon]:
+ bookmark.icon_href = icon
+ content_type, bookmark.icon = icons[icon]
+ self.log(" cached icon: %s" % content_type)
+ else:
+ self.log(" cached icon: no icon")
+ else:
+ try:
+ _icon = icon
+ for i in range(8):
+ try:
+ icon_fname, headers = urllib.urlretrieve(_icon)
+ except RedirectException, e:
+ _icon = e.url
+ self.log(" redirect to : %s" % _icon)
+ else:
+ break
+ else:
+ raise IOError("Too many redirects")
+ except:
+ etype, emsg, tb = sys.exc_info()
+ self.log(" no icon : %s %s" % (etype, emsg))
+ etype = emsg = tb = None
+ icons[icon] = None
+ else:
+ content_type = headers["Content-Type"]
+ if content_type.startswith("application/") \
+ or content_type.startswith("image/") \
+ or content_type.startswith("text/plain"):
+ icon_file = open(icon_fname, "rb")
+ icon_data = icon_file.read()
+ icon_file.close()
+ bookmark.icon_href = icon
+ self.log(" got icon : %s" % content_type)
+ if content_type.startswith("application/") \
+ or content_type.startswith("text/plain"):
+ self.log(" non-image content type, assume x-icon")
+ content_type = 'image/x-icon'
+ bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data))
+ icons[icon] = (content_type, bookmark.icon)
+ else:
+ self.log(" no icon : bad content type '%s'" % content_type)
+ icons[icon] = None
+ if parser and parser.refresh:
+ refresh = parser.refresh
+ try:
+ url = refresh.split('=', 1)[1]
+ except IndexError:
+ url = "self"
+ try:
+ timeout = float(refresh.split(';')[0])
+ except (IndexError, ValueError):
+ raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh))
+ else: