try:
save_parent = bookmark.parent
bookmark.parent = None
-
- bookmark.tempfname = self.tempfname
subp_pipe.write_record(pickle.dumps(bookmark))
if check_subp.waitForPendingChar(60): # wait a minute
"""
Check URL - subprocess for the forking robot
- Written by Broytman. Copyright (C) 1999-2007 PhiloSoft Design.
+ Written by Broytman. Copyright (C) 1999-2010 PhiloSoft Design.
"""
from m_lib.flog import openlog
log = openlog("check2.log")
from bkmk_rsimple import robot_simple
- robot = robot_simple(None, log)
+ robot = robot_simple(log)
while 1:
bookmark = pickle.loads(bkmk_in.read_record())
class robot_simple(Robot):
def check_url(self, bookmark):
- if not self.tempfname:
- self.tempfname = bookmark.tempfname
-
try:
self.start = int(time.time())
bookmark.icon = None
urllib._urlopener.addheaders[2] = ('Referer', "%s://%s%s" % (url_type, url_host, url_path))
if bookmark.charset: urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
- fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), self.tempfname)
+ fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path))
if bookmark.charset: del urllib._urlopener.addheaders[-1]
size = 0
if urllib._urlopener.type == "ftp": # Pass welcome message through MD5
md5.update(get_welcome())
- md5.md5file(self.tempfname)
+ md5.md5file(fname)
bookmark.md5 = str(md5)
if headers:
_icon = icon
for i in range(8):
try:
- fname, headers = urllib.urlretrieve(_icon)
+ icon_fname, headers = urllib.urlretrieve(_icon)
except RedirectException, e:
_icon = e.url
self.log(" redirect to : %s" % _icon)
else:
content_type = headers["Content-Type"]
if content_type.startswith("image/"):
- icon_file = open(fname, "rb")
+ icon_file = open(icon_fname, "rb")
icon_data = icon_file.read()
icon_file.close()
bookmark.icon_href = icon
else:
self.log(" no icon : bad content type '%s'" % content_type)
icons[icon] = None
- os.remove(fname)
+ os.remove(icon_fname)
except KeyError, key:
self.log(" no header: %s" % key)
self.log(' Exception: %s' % bookmark.error)
finally:
- self.finish_check_url(bookmark)
+ self.finish_check_url(bookmark, fname)
# Tested
return 1
- def finish_check_url(self, bookmark):
+ def finish_check_url(self, bookmark, fname=None):
# Calculate these attributes even in case of an error
- if os.path.exists(self.tempfname):
- size = str(os.path.getsize(self.tempfname))
+ if fname and os.path.exists(fname):
+ size = str(os.path.getsize(fname))
if size[-1] == 'L':
size = size[:-1]
bookmark.size = size
from bkmk_objects import Bookmark
from Robots.bkmk_rsimple import robot_simple
-import tempfile
-tempfname = "bkmk-add" + tempfile.gettempprefix() + "tmp"
-
def run():
from getopt import getopt
bookmark = Bookmark(href, str(now), '0', '0')
bookmark.name = ''
- robot = robot_simple(tempfname, None)
+ robot = robot_simple(None)
if robot.check_url(bookmark): # get real title and last modified date
if title: # forced title
print "Ok"
- try:
- os.unlink(tempfname)
- except os.error:
- pass
-
-
if __name__ == '__main__':
run()
class Robot:
- def __init__(self, tempfname, log):
- self.tempfname = tempfname
+ def __init__(self, log):
self.log = log
def stop(self):
from bkmk_objects import Bookmark
from Writers.bkmk_wflad import strftime
-tempfname = "check_url.tmp"
def run():
log = makelog("check.log")
from robots import robot
- robot = robot(tempfname, log)
+ robot = robot(log)
for url in sys.argv[1:]:
bookmark = Bookmark(href=url, add_date=None)
import sys, os
-tempfname = "check_urls.tmp"
def run():
storage = storage()
from robots import robot
- robot = robot(tempfname, log)
+ robot = robot(log)
if report_stats:
sys.stdout.write("Loading %s: " % storage.filename)
log("check_urls finished ok")
log.close()
- try:
- os.unlink(tempfname)
- except os.error:
- pass
-
if __name__ == '__main__':
run()
A set of classes, libraries, programs and plugins I use to manipulate my
bookmarks.html.
-WHAT'S NEW in version 4.2.2.
+WHAT'S NEW in version 4.3.0 (2010-??).
- Added HTML Parser based on lxml.
-
-WHAT'S NEW in version 4.2.1 (2010-08-12).
-
- Added HTML Parser based on html5 library.
-
-WHAT'S NEW in version 4.2.0 (2010-08-11).
-
- New storage: json; it allows to load and store Mozilla (Firefox) backup
-files.
+Robots no longer have one global temporary file - there are at least two
+(html and favicon), and in the future there will be more for
+asynchronous robot(s) that would test many URLs in parallel.
WHERE TO GET