]> git.phdru.name Git - bookmarks_db.git/commitdiff
Feat(Robots): Robot based on aiohttp, processes multiple URLs in parallel 6.0.0
authorOleg Broytman <phd@phdru.name>
Mon, 19 Aug 2024 00:59:24 +0000 (03:59 +0300)
committerOleg Broytman <phd@phdru.name>
Mon, 19 Aug 2024 08:10:47 +0000 (11:10 +0300)
Robots/bkmk_rmultiaio.py [new file with mode: 0644]
bkmk_objects.py
doc/ANNOUNCE
doc/ChangeLog
robots.py

diff --git a/Robots/bkmk_rmultiaio.py b/Robots/bkmk_rmultiaio.py
new file mode 100644 (file)
index 0000000..784752c
--- /dev/null
@@ -0,0 +1,100 @@
+"""Robot based on aiohttp, processes multiple URLs in parallel
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2024 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_multiaio']
+
+
+import asyncio
+import contextvars
+
+from Robots.bkmk_raiohttp import robot_aiohttp
+from Robots.multi_mixin import multi_mixin
+
+
+current_href = contextvars.ContextVar('current_href')
+
+
+class robot_multiaio(multi_mixin, robot_aiohttp):
+    def __init__(self, *args, **kw):
+        multi_mixin.__init__(self, *args, **kw)
+        robot_aiohttp.__init__(self, *args, **kw)
+
+        # We need one event loop for the entire application
+        # so that we can save pending tasks between calls to self.wait().
+        # This also means we cannot use asyncio.run().
+        self.loop = loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+
+        # Rename self.log, create one log_lines list per URL
+        self.file_log = self.log
+        del self.log
+        self.logs = {}  # Map {href: [log lines]}
+
+    def __getattr__(self, attr):
+        if attr != 'log':
+            raise AttributeError(attr)
+        href = current_href.get()
+        return self.logs[href].append
+
+    def version_str(self):
+        return super(robot_multiaio, self).version_str() + ' multi: aiohttp'
+
+    async def check_bookmark_async_log(self, bookmark):
+        current_href.set(bookmark.href)
+        await self.check_bookmark_async(bookmark)
+
+    def wait(self):
+        self.loop.run_until_complete(self.wait_async())
+
+    async def wait_async(self):
+        bookmarks = self.bookmarks
+        pending = self.pending
+
+        free_workers = self.max_urls - len(pending)
+        if bookmarks and (free_workers > 0):  # we have job and free workers
+            for href in bookmarks:
+                bookmark, _, task = bookmarks[href]
+                if task is not None:  # it's already pending
+                    continue
+                task = asyncio.create_task(
+                    self.check_bookmark_async_log(bookmark))
+                bookmarks[href] = [bookmark, None, task]
+                self.logs[href] = []
+                pending.add(task)
+                task.href = href
+
+                free_workers -= 1
+                if free_workers == 0:
+                    break
+
+        if pending:
+            done, pending = await asyncio.wait(
+                pending, timeout=self.timeout+1,
+                return_when=asyncio.FIRST_COMPLETED)
+        self.pending = pending
+
+        for task in done:
+            bookmark, _, old_task = bookmarks.pop(task.href)
+            assert old_task is task
+            log = self.file_log
+            log_lines = self.logs.pop(bookmark.href)
+            log('Checked: %s' % bookmark.href)
+            if log_lines:
+                for line in log_lines:
+                    log(line)
+            else:
+                if hasattr(bookmark, 'error'):
+                    log('   Error: %s' % bookmark.error)
+                else:
+                    log('   No logs')
+
+    def stop(self):
+        super(robot_multiaio, self).stop()
+        self.loop.close()
index 3b604e1fcbc9c82972ab7fbcabb38b497f7dc3d1..02fab566106600d05fa62fb3f01486964164531a 100644 (file)
@@ -7,7 +7,7 @@ This file is a part of Bookmarks database and Internet robot.
 __author__ = "Oleg Broytman <phd@phdru.name>"
 __copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
 __license__ = "GNU GPL"
-__version__ = '5.7.0'
+__version__ = '6.0.0'
 
 __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
            'InverseLinker', 'Linear', 'make_linear', 'make_tree', 'break_tree',
index 47b45a6f29cfd26261152835a4273c0f77d6eaac..c6981a1c72f6493d3fcca601a8e569a0d69ea5c2 100644 (file)
@@ -9,6 +9,10 @@ WHAT'S NEW
 
 Version 6.0.0 (2024-??-??)
 
+   Robot based on aiohttp, processes multiple URLs in parallel.
+
+   Default list of robots is now multirequests,multiaio,curl.
+
    Make all robots async.
    Split check_bookmark() into sync and async variants.
 
index 5e08408a24589b398a3956a4c7d5e62798d88752..915b3378ad02c298d7ac5f4bd10515c7163f5be3 100644 (file)
@@ -1,5 +1,9 @@
 Version 6.0.0 (2024-??-??)
 
+   Robot based on aiohttp, processes multiple URLs in parallel.
+
+   Default list of robots is now multirequests,multiaio,curl.
+
    Make all robots async.
    Split check_bookmark() into sync and async variants.
 
index 583d1a8819d62c747f5464e8ed0f7ffc347c78b2..0ddba41451dece400474bdc79680e5ee7fe0ef70 100644 (file)
--- a/robots.py
+++ b/robots.py
@@ -16,7 +16,7 @@ from os import environ
 from bkmk_objects import parse_params, set_params
 
 robot_names, robot_params = parse_params(
-    environ.get("BKMK_ROBOT", "multirequests,curl,requests,aiohttp"))
+    environ.get("BKMK_ROBOT", "multirequests,multiaoi,curl"))
 
 
 def import_robot(robot_name):