From 7fd1132c4ab530b7b34e846ca45f99b8737dd127 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Thu, 7 Oct 2010 18:10:46 +0000 Subject: [PATCH] Robots no longer have one global temporary file - there are at least two (html and favicon), and in the future there will be more for asynchronous robot(s) that would test many URLs in parallel. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@290 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- Robots/bkmk_rforking.py | 2 -- Robots/bkmk_rforking_sub.py | 4 ++-- Robots/bkmk_rsimple.py | 21 +++++++++------------ bkmk-add.py | 11 +---------- bkmk_objects.py | 3 +-- check_url.py | 3 +-- check_urls.py | 8 +------- doc/ANNOUNCE | 15 ++++----------- 8 files changed, 19 insertions(+), 48 deletions(-) diff --git a/Robots/bkmk_rforking.py b/Robots/bkmk_rforking.py index f0e9354..594359f 100644 --- a/Robots/bkmk_rforking.py +++ b/Robots/bkmk_rforking.py @@ -47,8 +47,6 @@ class robot_forking(Robot): try: save_parent = bookmark.parent bookmark.parent = None - - bookmark.tempfname = self.tempfname subp_pipe.write_record(pickle.dumps(bookmark)) if check_subp.waitForPendingChar(60): # wait a minute diff --git a/Robots/bkmk_rforking_sub.py b/Robots/bkmk_rforking_sub.py index 6fccf40..63464fc 100755 --- a/Robots/bkmk_rforking_sub.py +++ b/Robots/bkmk_rforking_sub.py @@ -2,7 +2,7 @@ """ Check URL - subprocess for the forking robot - Written by Broytman. Copyright (C) 1999-2007 PhiloSoft Design. + Written by Broytman. Copyright (C) 1999-2010 PhiloSoft Design. """ @@ -28,7 +28,7 @@ def run(): from m_lib.flog import openlog log = openlog("check2.log") from bkmk_rsimple import robot_simple - robot = robot_simple(None, log) + robot = robot_simple(log) while 1: bookmark = pickle.loads(bkmk_in.read_record()) diff --git a/Robots/bkmk_rsimple.py b/Robots/bkmk_rsimple.py index e01c7fe..69636d9 100644 --- a/Robots/bkmk_rsimple.py +++ b/Robots/bkmk_rsimple.py @@ -105,9 +105,6 @@ icons = {} # Icon cache; maps URL to a tuple (content type, data) class robot_simple(Robot): def check_url(self, bookmark): - if not self.tempfname: - self.tempfname = bookmark.tempfname - try: self.start = int(time.time()) bookmark.icon = None @@ -120,7 +117,7 @@ class robot_simple(Robot): urllib._urlopener.addheaders[2] = ('Referer', "%s://%s%s" % (url_type, url_host, url_path)) if bookmark.charset: urllib._urlopener.addheader('Accept-Charset', bookmark.charset) - fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), self.tempfname) + fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path)) if bookmark.charset: del urllib._urlopener.addheaders[-1] size = 0 @@ -152,7 +149,7 @@ class robot_simple(Robot): if urllib._urlopener.type == "ftp": # Pass welcome message through MD5 md5.update(get_welcome()) - md5.md5file(self.tempfname) + md5.md5file(fname) bookmark.md5 = str(md5) if headers: @@ -215,7 +212,7 @@ class robot_simple(Robot): _icon = icon for i in range(8): try: - fname, headers = urllib.urlretrieve(_icon) + icon_fname, headers = urllib.urlretrieve(_icon) except RedirectException, e: _icon = e.url self.log(" redirect to : %s" % _icon) @@ -233,7 +230,7 @@ class robot_simple(Robot): else: content_type = headers["Content-Type"] if content_type.startswith("image/"): - icon_file = open(fname, "rb") + icon_file = open(icon_fname, "rb") icon_data = icon_file.read() icon_file.close() bookmark.icon_href = icon @@ -243,7 +240,7 @@ class robot_simple(Robot): else: self.log(" no icon : bad content type '%s'" % content_type) icons[icon] = None - os.remove(fname) + os.remove(icon_fname) except KeyError, key: self.log(" no header: %s" % key) @@ -275,16 +272,16 @@ class robot_simple(Robot): self.log(' Exception: %s' % bookmark.error) finally: - self.finish_check_url(bookmark) + self.finish_check_url(bookmark, fname) # Tested return 1 - def finish_check_url(self, bookmark): + def finish_check_url(self, bookmark, fname=None): # Calculate these attributes even in case of an error - if os.path.exists(self.tempfname): - size = str(os.path.getsize(self.tempfname)) + if fname and os.path.exists(fname): + size = str(os.path.getsize(fname)) if size[-1] == 'L': size = size[:-1] bookmark.size = size diff --git a/bkmk-add.py b/bkmk-add.py index 46efe3e..7e7b415 100755 --- a/bkmk-add.py +++ b/bkmk-add.py @@ -10,9 +10,6 @@ import sys, os, time from bkmk_objects import Bookmark from Robots.bkmk_rsimple import robot_simple -import tempfile -tempfname = "bkmk-add" + tempfile.gettempprefix() + "tmp" - def run(): from getopt import getopt @@ -53,7 +50,7 @@ def run(): bookmark = Bookmark(href, str(now), '0', '0') bookmark.name = '' - robot = robot_simple(tempfname, None) + robot = robot_simple(None) if robot.check_url(bookmark): # get real title and last modified date if title: # forced title @@ -74,11 +71,5 @@ def run(): print "Ok" - try: - os.unlink(tempfname) - except os.error: - pass - - if __name__ == '__main__': run() diff --git a/bkmk_objects.py b/bkmk_objects.py index a9567e2..17d0509 100644 --- a/bkmk_objects.py +++ b/bkmk_objects.py @@ -99,8 +99,7 @@ class Writer(Walker): class Robot: - def __init__(self, tempfname, log): - self.tempfname = tempfname + def __init__(self, log): self.log = log def stop(self): diff --git a/check_url.py b/check_url.py index 123f6a9..b0a6869 100755 --- a/check_url.py +++ b/check_url.py @@ -20,7 +20,6 @@ httplib.HTTP = MyHTTP from bkmk_objects import Bookmark from Writers.bkmk_wflad import strftime -tempfname = "check_url.tmp" def run(): @@ -34,7 +33,7 @@ def run(): log = makelog("check.log") from robots import robot - robot = robot(tempfname, log) + robot = robot(log) for url in sys.argv[1:]: bookmark = Bookmark(href=url, add_date=None) diff --git a/check_urls.py b/check_urls.py index c3e81b8..f75cb16 100755 --- a/check_urls.py +++ b/check_urls.py @@ -7,7 +7,6 @@ import sys, os -tempfname = "check_urls.tmp" def run(): @@ -63,7 +62,7 @@ def run(): storage = storage() from robots import robot - robot = robot(tempfname, log) + robot = robot(log) if report_stats: sys.stdout.write("Loading %s: " % storage.filename) @@ -153,11 +152,6 @@ def run(): log("check_urls finished ok") log.close() - try: - os.unlink(tempfname) - except os.error: - pass - if __name__ == '__main__': run() diff --git a/doc/ANNOUNCE b/doc/ANNOUNCE index 132ccbc..79eec54 100644 --- a/doc/ANNOUNCE +++ b/doc/ANNOUNCE @@ -5,18 +5,11 @@ WHAT IS IT A set of classes, libraries, programs and plugins I use to manipulate my bookmarks.html. -WHAT'S NEW in version 4.2.2. +WHAT'S NEW in version 4.3.0 (2010-??). - Added HTML Parser based on lxml. - -WHAT'S NEW in version 4.2.1 (2010-08-12). - - Added HTML Parser based on html5 library. - -WHAT'S NEW in version 4.2.0 (2010-08-11). - - New storage: json; it allows to load and store Mozilla (Firefox) backup -files. +Robots no longer have one global temporary file - there are at least two +(html and favicon), and in the future there will be more for +asynchronous robot(s) that would test many URLs in parallel. WHERE TO GET -- 2.39.2