"""
Forking robot
- Written by BroytMann. Copyright (C) 2000-2007 PhiloSoft Design
+ Written by BroytMann. Copyright (C) 2000-2007 PhiloSoft Design.
"""
from bkmk_objects import Robot
class robot_forking(Robot):
- def check_url(self, bookmark, url_type, url_rest):
+ def check_url(self, bookmark):
if not check_subp:
restart_subp(self.log) # Not restart, just start afresh
#! /usr/bin/env python
"""
- Check URL - subprocess
+ Check URL - subprocess for the forking robot
- Written by BroytMann, Mar 1999 - Aug 2002. Copyright (C) 1999-2002 PhiloSoft Design
+ Written by BroytMann. Copyright (C) 1999-2007 PhiloSoft Design.
"""
-import sys, os, urllib
+import sys, os
lib_dir = os.path.normpath(os.path.dirname(sys.argv[0]) + os.sep + os.pardir)
sys.path.append(lib_dir) # for bkmk_objects.py
while 1:
bookmark = pickle.loads(bkmk_in.read_record())
log(bookmark.href)
- url_type, url_rest = urllib.splittype(bookmark.href)
- robot.check_url(bookmark, url_type, url_rest)
+ robot.check_url(bookmark)
bkmk_out.write_record(pickle.dumps(bookmark))
log.outfile.flush()
from parse_html import parse_html
class robot_simple(Robot):
- def check_url(self, bookmark, url_type, url_rest):
+ def check_url(self, bookmark):
if not self.tempfname:
self.tempfname = bookmark.tempfname
try:
try:
self.start = int(time.time())
+ url_type, url_rest = urllib.splittype(bookmark.href)
url_host, url_path = urllib.splithost(url_rest)
url_path, url_tag = urllib.splittag(url_path)
from bkmk_rsimple import robot_simple, get_error
class robot_simple_tos(robot_simple):
- def check_url(self, bookmark, url_type, url_rest):
+ def check_url(self, bookmark):
try:
- return robot_simple.check_url(self, bookmark, url_type, url_rest)
+ return robot_simple.check_url(self, bookmark)
except (socket.error, timeoutsocket.Timeout), msg:
bookmark.error = get_error(msg)
"""
Add a bookmark to the database.
- Written by BroytMann, Aug 2002. Copyright (C) 2002 PhiloSoft Design
+ Written by BroytMann. Copyright (C) 2002-2007 PhiloSoft Design.
"""
-import sys, os, time, urllib
+import sys, os, time
from bkmk_objects import Bookmark
from Robots.bkmk_rsimple import robot_simple
bookmark.name = ''
robot = robot_simple(tempfname, None)
- url_type, url_rest = urllib.splittype(href)
- if robot.check_url(bookmark, url_type, url_rest): # get real title and last modified date
+ if robot.check_url(bookmark): # get real title and last modified date
if title: # forced title
bookmark.name = title
elif hasattr(bookmark, "real_title"):
object.parent.append(object)
return root_folder
+
+def break_tree(linear):
+ root_folder = linear[0]
+ del linear[0]
+
+ for object in linear:
+ del object.parent
"""
Robot interface - check URLs from bookmarks database
- Written by BroytMann, Mar 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design
+ Written by BroytMann. Copyright (C) 2000-2007 PhiloSoft Design.
"""
-import sys, os, urllib, tempfile
+import sys, os, tempfile
tempfname = "check_urls" + tempfile.gettempprefix() + "tmp"
pass
if report_stats:
- print "BroytMann check_urls, Copyright (C) 1997-2002 PhiloSoft Design"
+ print "BroytMann check_urls, Copyright (C) 1997-2007 PhiloSoft Design"
if args:
sys.stderr.write("check_urls: too many arguments\n")
if hasattr(old_object, attr_name):
setattr(object, attr_name, getattr(old_object, attr_name))
else:
- url_type, url_rest = urllib.splittype(object.href)
- log("Checking %s:%s" % (url_type, url_rest))
- rcode = robot.check_url(object, url_type, url_rest)
+ log("Checking %s" % object.href)
+ rcode = robot.check_url(object)
if rcode:
checked[object.href] = object_no