self.tempfname = bookmark.tempfname
try:
- try:
- self.start = int(time.time())
- bookmark.icon = None
+ self.start = int(time.time())
+ bookmark.icon = None
- url_type, url_rest = urllib.splittype(bookmark.href)
- url_host, url_path = urllib.splithost(url_rest)
- url_path, url_tag = urllib.splittag(url_path)
+ url_type, url_rest = urllib.splittype(bookmark.href)
+ url_host, url_path = urllib.splithost(url_rest)
+ url_path, url_tag = urllib.splittag(url_path)
- # Set fake referer to the root of the site
- urllib._urlopener.addheaders[2] = ('Referer', "%s://%s%s" % (url_type, url_host, url_path))
+ # Set fake referer to the root of the site
+ urllib._urlopener.addheaders[2] = ('Referer', "%s://%s%s" % (url_type, url_host, url_path))
- if bookmark.charset: urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
- fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), self.tempfname)
- if bookmark.charset: del urllib._urlopener.addheaders[-1]
+ if bookmark.charset: urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
+ fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), self.tempfname)
+ if bookmark.charset: del urllib._urlopener.addheaders[-1]
- size = 0
- last_modified = None
+ size = 0
+ last_modified = None
- if headers:
- try:
- size = headers["Content-Length"]
- except KeyError:
- pass
-
- try:
- last_modified = headers["Last-Modified"]
- except KeyError:
- pass
+ if headers:
+ try:
+ size = headers["Content-Length"]
+ except KeyError:
+ pass
- if last_modified:
- last_modified = parse_time(last_modified)
+ try:
+ last_modified = headers["Last-Modified"]
+ except KeyError:
+ pass
if last_modified:
- last_modified = str(int(last_modified))
- else:
- last_modified = bookmark.last_visit
+ last_modified = parse_time(last_modified)
+
+ if last_modified:
+ last_modified = str(int(last_modified))
+ else:
+ last_modified = bookmark.last_visit
- bookmark.size = size
- bookmark.last_modified = last_modified
+ bookmark.size = size
+ bookmark.last_modified = last_modified
- md5 = md5wrapper()
- if urllib._urlopener.type == "ftp": # Pass welcome message through MD5
- md5.update(get_welcome())
+ md5 = md5wrapper()
+ if urllib._urlopener.type == "ftp": # Pass welcome message through MD5
+ md5.update(get_welcome())
- md5.md5file(self.tempfname)
- bookmark.md5 = str(md5)
+ md5.md5file(self.tempfname)
+ bookmark.md5 = str(md5)
- if headers:
+ if headers:
+ try:
+ content_type = headers["Content-Type"]
try:
- content_type = headers["Content-Type"]
- try:
- content_type, charset = content_type.split(';')
- content_type = content_type.strip()
- charset = charset.split('=')[1].strip()
- self.log(" HTTP charset : %s" % charset)
- except (ValueError, IndexError):
- charset = None
- self.log(" no charset in Content-Type header")
- if content_type == "text/html":
- parser = parse_html(fname, charset, self.log)
- bookmark.real_title = parser.title
- if parser.refresh:
- refresh = parser.refresh
- try:
- url = refresh.split('=', 1)[1]
- except IndexError:
- url = "self"
+ content_type, charset = content_type.split(';')
+ content_type = content_type.strip()
+ charset = charset.split('=')[1].strip()
+ self.log(" HTTP charset : %s" % charset)
+ except (ValueError, IndexError):
+ charset = None
+ self.log(" no charset in Content-Type header")
+ if content_type == "text/html":
+ parser = parse_html(fname, charset, self.log)
+ bookmark.real_title = parser.title
+ if parser.refresh:
+ refresh = parser.refresh
+ try:
+ url = refresh.split('=', 1)[1]
+ except IndexError:
+ url = "self"
+ try:
+ timeout = float(refresh.split(';')[0])
+ except (IndexError, ValueError):
+ raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh))
+ else:
try:
- timeout = float(refresh.split(';')[0])
- except (IndexError, ValueError):
- raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh))
- else:
+ timeout = int(refresh.split(';')[0])
+ except ValueError:
+ pass # float timeout
+ raise RedirectException("html", "%s (%s sec)" % (url, timeout))
+
+ # Get favicon.ico
+ icon = parser.icon
+ if not icon:
+ icon = "/favicon.ico"
+ icon = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon)
+ self.log(" looking for icon at: %s" % icon)
+ if icon in icons:
+ if icons[icon]:
+ content_type, bookmark.icon = icons[icon]
+ self.log(" cached icon: %s" % content_type)
+ else:
+ self.log(" cached icon: no icon")
+ else:
+ try:
+ _icon = icon
+ for i in range(8):
try:
- timeout = int(refresh.split(';')[0])
- except ValueError:
- pass # float timeout
- raise RedirectException("html", "%s (%s sec)" % (url, timeout))
-
- # Get favicon.ico
- icon = parser.icon
- if not icon:
- icon = "/favicon.ico"
- icon = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon)
- self.log(" looking for icon at: %s" % icon)
- if icon in icons:
- if icons[icon]:
- content_type, bookmark.icon = icons[icon]
- self.log(" cached icon: %s" % content_type)
+ fname, headers = urllib.urlretrieve(_icon)
+ except RedirectException, e:
+ _icon = e.url
+ self.log(" redirect to : %s" % _icon)
+ else:
+ break
else:
- self.log(" cached icon: no icon")
+ raise IOError("Too many redirects")
+ except:
+ etype, emsg, tb = sys.exc_info()
+ self.log(" no icon : %s %s" % (etype, emsg))
+ etype = None
+ emsg = None
+ tb = None
+ icons[icon] = None
else:
- try:
- _icon = icon
- for i in range(8):
- try:
- fname, headers = urllib.urlretrieve(_icon)
- except RedirectException, e:
- _icon = e.url
- self.log(" redirect to : %s" % _icon)
- else:
- break
- else:
- raise IOError("Too many redirects")
- except:
- etype, emsg, tb = sys.exc_info()
- self.log(" no icon : %s %s" % (etype, emsg))
- etype = None
- emsg = None
- tb = None
- icons[icon] = None
+ content_type = headers["Content-Type"]
+ if content_type.startswith("image/"):
+ icon_file = open(fname, "rb")
+ icon = icon_file.read()
+ icon_file.close()
+ bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon))
+ self.log(" got icon : %s" % content_type)
+ icons[icon] = (content_type, bookmark.icon)
else:
- content_type = headers["Content-Type"]
- if content_type.startswith("image/"):
- icon_file = open(fname, "rb")
- icon = icon_file.read()
- icon_file.close()
- bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon))
- self.log(" got icon : %s" % content_type)
- icons[icon] = (content_type, bookmark.icon)
- else:
- self.log(" no icon : bad content type '%s'" % content_type)
- icons[icon] = None
- os.remove(fname)
+ self.log(" no icon : bad content type '%s'" % content_type)
+ icons[icon] = None
+ os.remove(fname)
- except KeyError:
- pass
+ except KeyError:
+ pass
- except IOError, msg:
- if (msg[0] == "http error") and (msg[1] == -1):
- bookmark.no_error = "The server did not return any header - it is not an error, actually"
- else:
- bookmark.error = get_error(msg)
+ except IOError, msg:
+ if (msg[0] == "http error") and (msg[1] == -1):
+ bookmark.no_error = "The server did not return any header - it is not an error, actually"
+ else:
+ bookmark.error = get_error(msg)
- except EOFError:
- bookmark.error = "Unexpected EOF (FTP server closed connection)"
+ except EOFError:
+ bookmark.error = "Unexpected EOF (FTP server closed connection)"
- except RedirectException, msg:
- bookmark.moved = str(msg)
+ except RedirectException, msg:
+ bookmark.moved = str(msg)
- except KeyboardInterrupt:
- return 0
+ except KeyboardInterrupt:
+ return 0
- except:
- import traceback
- traceback.print_exc()
- bookmark.error = "Exception!"
+ except:
+ import traceback
+ traceback.print_exc()
+ bookmark.error = "Exception!"
finally:
self.finish_check_url(bookmark)