]> git.phdru.name Git - bookmarks_db.git/commitdiff
Refactor(Robots): Split off `multi_mixin`
authorOleg Broytman <phd@phdru.name>
Sun, 18 Aug 2024 20:38:52 +0000 (23:38 +0300)
committerOleg Broytman <phd@phdru.name>
Mon, 19 Aug 2024 08:06:54 +0000 (11:06 +0300)
Robots/bkmk_rmultirequests.py
Robots/multi_mixin.py [new file with mode: 0644]

index 5ae0e373fbe67024a8453fa2b8ca950af465dd1c..624263ad976f658834bb81844471e897c08459c8 100644 (file)
@@ -17,44 +17,31 @@ import os
 
 from bkmk_objects import copy_bkmk
 from Robots.bkmk_rrequests import robot_requests
+from Robots.multi_mixin import multi_mixin
 from robots import import_robot, set_params, robot_params
 
+
 cpu_count = os.cpu_count()
 
 
-class robot_multirequests(robot_requests):
+class robot_multirequests(multi_mixin, robot_requests):
     concurrent_class = concurrent.futures.ProcessPoolExecutor  # or ThreadPoolExecutor # noqa: E501 line too long
+
     # We're I/O-bound, not CPU-bound
     max_urls = 2*cpu_count if cpu_count else 10
 
     def __init__(self, *args, **kw):
-        if isinstance(self.max_urls, str):
-            self.max_urls = int(self.max_urls)
         concurrent_class = getattr(concurrent.futures, self.concurrent_class) \
             if isinstance(self.concurrent_class, str) \
             else self.concurrent_class
         self.concurrent_class_name = concurrent_class.__name__
+        multi_mixin.__init__(self, *args, **kw)
         robot_requests.__init__(self, *args, **kw)
-        self.executor = concurrent_class(max_urls=self.max_urls)
-
-        # Bookmarks waiting to be processed;
-        # maps {URL: [bookmark, saved parent, future]}
-        self.bookmarks = {}
-        self.pending = set()  # pending futures
+        self.executor = concurrent_class(max_workers=self.max_urls)
 
     def version_str(self):
         return super(robot_multirequests, self).version_str() \
-            + ' concurrent.futures.' + self.concurrent_class_name
-
-    def check_bookmark(self, bookmark):
-        href = bookmark.href
-        bookmarks = self.bookmarks
-        if href in bookmarks:
-            return
-        bookmarks[href] = [bookmark, None, None]
-        if len(bookmarks) < self.max_urls:
-            return
-        self.wait()
+            + ' multi: concurrent.futures.' + self.concurrent_class_name
 
     def wait(self):
         log = self.log
@@ -62,7 +49,7 @@ class robot_multirequests(robot_requests):
         pending = self.pending
 
         free_workers = self.max_urls - len(pending)
-        if bookmarks and (free_workers > 0):  # there's job and free workers,
+        if bookmarks and (free_workers > 0):  # we have job and free workers
             for href in bookmarks:
                 bookmark, parent, ft = bookmarks[href]
                 if ft is not None:  # it's already pending
@@ -96,8 +83,7 @@ class robot_multirequests(robot_requests):
                 log(line)
 
     def stop(self):
-        while self.bookmarks or self.pending:
-            self.wait()
+        super(robot_multirequests, self).stop()
         self.executor.shutdown(wait=True)
 
 
diff --git a/Robots/multi_mixin.py b/Robots/multi_mixin.py
new file mode 100644 (file)
index 0000000..ef4542c
--- /dev/null
@@ -0,0 +1,38 @@
+"""Mix-in for robots ath process multiple URLs in parallel.
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2024 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['multi_mixin']
+
+
+class multi_mixin:
+    max_urls = 10  # Small default
+
+    def __init__(self, *args, **kw):
+        if isinstance(self.max_urls, str):
+            self.max_urls = int(self.max_urls)
+
+        # Bookmarks waiting to be processed;
+        # maps {URL: [bookmark, saved parent, task]}
+        self.bookmarks = {}
+        self.pending = set()  # pending tasks
+
+    def check_bookmark(self, bookmark):
+        href = bookmark.href
+        bookmarks = self.bookmarks
+        if href in bookmarks:
+            return
+        bookmarks[href] = [bookmark, None, None]
+        if len(bookmarks) < self.max_urls:
+            return
+        self.wait()
+
+    def stop(self):
+        while self.bookmarks or self.pending:
+            self.wait()