from bkmk_objects import copy_bkmk
from Robots.bkmk_rrequests import robot_requests
+from Robots.multi_mixin import multi_mixin
from robots import import_robot, set_params, robot_params
+
cpu_count = os.cpu_count()
-class robot_multirequests(robot_requests):
+class robot_multirequests(multi_mixin, robot_requests):
concurrent_class = concurrent.futures.ProcessPoolExecutor # or ThreadPoolExecutor # noqa: E501 line too long
+
# We're I/O-bound, not CPU-bound
max_urls = 2*cpu_count if cpu_count else 10
def __init__(self, *args, **kw):
- if isinstance(self.max_urls, str):
- self.max_urls = int(self.max_urls)
concurrent_class = getattr(concurrent.futures, self.concurrent_class) \
if isinstance(self.concurrent_class, str) \
else self.concurrent_class
self.concurrent_class_name = concurrent_class.__name__
+ multi_mixin.__init__(self, *args, **kw)
robot_requests.__init__(self, *args, **kw)
- self.executor = concurrent_class(max_urls=self.max_urls)
-
- # Bookmarks waiting to be processed;
- # maps {URL: [bookmark, saved parent, future]}
- self.bookmarks = {}
- self.pending = set() # pending futures
+ self.executor = concurrent_class(max_workers=self.max_urls)
def version_str(self):
return super(robot_multirequests, self).version_str() \
- + ' concurrent.futures.' + self.concurrent_class_name
-
- def check_bookmark(self, bookmark):
- href = bookmark.href
- bookmarks = self.bookmarks
- if href in bookmarks:
- return
- bookmarks[href] = [bookmark, None, None]
- if len(bookmarks) < self.max_urls:
- return
- self.wait()
+ + ' multi: concurrent.futures.' + self.concurrent_class_name
def wait(self):
log = self.log
pending = self.pending
free_workers = self.max_urls - len(pending)
- if bookmarks and (free_workers > 0): # there's job and free workers,
+ if bookmarks and (free_workers > 0): # we have job and free workers
for href in bookmarks:
bookmark, parent, ft = bookmarks[href]
if ft is not None: # it's already pending
log(line)
def stop(self):
- while self.bookmarks or self.pending:
- self.wait()
+ super(robot_multirequests, self).stop()
self.executor.shutdown(wait=True)
--- /dev/null
+"""Mix-in for robots ath process multiple URLs in parallel.
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2024 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['multi_mixin']
+
+
+class multi_mixin:
+ max_urls = 10 # Small default
+
+ def __init__(self, *args, **kw):
+ if isinstance(self.max_urls, str):
+ self.max_urls = int(self.max_urls)
+
+ # Bookmarks waiting to be processed;
+ # maps {URL: [bookmark, saved parent, task]}
+ self.bookmarks = {}
+ self.pending = set() # pending tasks
+
+ def check_bookmark(self, bookmark):
+ href = bookmark.href
+ bookmarks = self.bookmarks
+ if href in bookmarks:
+ return
+ bookmarks[href] = [bookmark, None, None]
+ if len(bookmarks) < self.max_urls:
+ return
+ self.wait()
+
+ def stop(self):
+ while self.bookmarks or self.pending:
+ self.wait()