From: Oleg Broytman Date: Wed, 21 Aug 2024 14:04:46 +0000 (+0300) Subject: Refactor(bkmk_rmultiaio): Split off `multi_async_mixin` X-Git-Tag: 6.1.0~11 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=c07804e1a64ad0280690c8fe09c5e0a63cc72ad4;p=bookmarks_db.git Refactor(bkmk_rmultiaio): Split off `multi_async_mixin` --- diff --git a/Robots/bkmk_rmultiaio.py b/Robots/bkmk_rmultiaio.py index d4b3342..19f3062 100644 --- a/Robots/bkmk_rmultiaio.py +++ b/Robots/bkmk_rmultiaio.py @@ -11,97 +11,15 @@ __license__ = "GNU GPL" __all__ = ['robot_multiaio'] -import asyncio -import contextvars - from Robots.bkmk_raiohttp import robot_aiohttp -from Robots.multi_mixin import multi_mixin - +from Robots.multi_async_mixin import multi_async_mixin -current_href = contextvars.ContextVar('current_href') - -class robot_multiaio(multi_mixin, robot_aiohttp): +class robot_multiaio(multi_async_mixin, robot_aiohttp): def __init__(self, *args, **kw): - multi_mixin.__init__(self, *args, **kw) + multi_async_mixin.__init__(self, *args, **kw) robot_aiohttp.__init__(self, *args, **kw) - - # We need one event loop for the entire application - # so that we can save pending tasks between calls to self.wait(). - # This also means we cannot use asyncio.run(). - self.loop = loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - # Rename self.log, create one log_lines list per URL - self.file_log = self.log - del self.log - self.logs = {} # Map {href: [log lines]} - - def __getattr__(self, attr): - if attr != 'log': - raise AttributeError(attr) - href = current_href.get() - return self.logs[href].append + self._init() def version_str(self): return super(robot_multiaio, self).version_str() + ' multi: aiohttp' - - async def check_bookmark_async_log(self, bookmark): - current_href.set(bookmark.href) - await self.check_bookmark_async(bookmark) - - async def get_url(self, url, headers): - if url not in self.logs: - self.logs[url] = [] - current_href.set(url) - return await super(robot_multiaio, self).get_url(url, headers) - - def wait(self): - self.loop.run_until_complete(self.wait_async()) - - async def wait_async(self): - bookmarks = self.bookmarks - pending = self.pending - - free_workers = self.max_urls - len(pending) - if bookmarks and (free_workers > 0): # we have job and free workers - for href in bookmarks: - bookmark, _, task = bookmarks[href] - if task is not None: # it's already pending - continue - task = asyncio.create_task( - self.check_bookmark_async_log(bookmark)) - bookmarks[href] = [bookmark, None, task] - if href not in self.logs: - self.logs[href] = [] - pending.add(task) - task.href = href - - free_workers -= 1 - if free_workers == 0: - break - - if pending: - done, pending = await asyncio.wait( - pending, timeout=self.timeout+1, - return_when=asyncio.FIRST_COMPLETED) - self.pending = pending - - for task in done: - bookmark, _, old_task = bookmarks.pop(task.href) - assert old_task is task - log = self.file_log - log_lines = self.logs.pop(bookmark.href) - log('Checked: %s' % bookmark.href) - if log_lines: - for line in log_lines: - log(line) - else: - if hasattr(bookmark, 'error'): - log(' Error: %s' % bookmark.error) - else: - log(' No logs') - - def stop(self): - super(robot_multiaio, self).stop() - self.loop.close() diff --git a/Robots/multi_async_mixin.py b/Robots/multi_async_mixin.py new file mode 100644 index 0000000..8126417 --- /dev/null +++ b/Robots/multi_async_mixin.py @@ -0,0 +1,100 @@ +"""Mix-in for async robots that process multiple URLs in parallel. + +This file is a part of Bookmarks database and Internet robot. + +""" + +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 2024 PhiloSoft Design" +__license__ = "GNU GPL" + +__all__ = ['multi_async_mixin'] + + +import asyncio +import contextvars + +from Robots.multi_mixin import multi_mixin + + +current_href = contextvars.ContextVar('current_href') + + +class multi_async_mixin(multi_mixin): + def _init(self): + # We need one event loop for the entire application + # so that we can save pending tasks between calls to self.wait(). + # This also means we cannot use asyncio.run(). + self.loop = loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + # Rename self.log, create one log_lines list per URL + self.file_log = self.log + del self.log + self.logs = {} # Map {href: [log lines]} + + def __getattr__(self, attr): + if attr != 'log': + raise AttributeError(attr) + href = current_href.get() + return self.logs[href].append + + async def check_bookmark_async_log(self, bookmark): + current_href.set(bookmark.href) + await self.check_bookmark_async(bookmark) + + async def get_url(self, url, headers): + if url not in self.logs: + self.logs[url] = [] + current_href.set(url) + return await super(multi_async_mixin, self).get_url(url, headers) + + def wait(self): + self.loop.run_until_complete(self.wait_async()) + + async def wait_async(self): + bookmarks = self.bookmarks + pending = self.pending + + free_workers = self.max_urls - len(pending) + if bookmarks and (free_workers > 0): # we have job and free workers + for href in bookmarks: + bookmark, _, task = bookmarks[href] + if task is not None: # it's already pending + continue + task = asyncio.create_task( + self.check_bookmark_async_log(bookmark)) + bookmarks[href] = [bookmark, None, task] + if href not in self.logs: + self.logs[href] = [] + pending.add(task) + task.href = href + + free_workers -= 1 + if free_workers == 0: + break + + if pending: + done, pending = await asyncio.wait( + pending, timeout=self.timeout+1, + return_when=asyncio.FIRST_COMPLETED) + self.pending = pending + + for task in done: + bookmark, _, old_task = bookmarks.pop(task.href) + assert old_task is task + log = self.file_log + log_lines = self.logs.pop(bookmark.href) + log('Checked: %s' % bookmark.href) + if log_lines: + for line in log_lines: + log(line) + else: + if hasattr(bookmark, 'error'): + log(' Error: %s' % bookmark.error) + else: + log(' No logs') + + def stop(self): + super(multi_async_mixin, self).stop() + self.loop.close() diff --git a/Robots/multi_mixin.py b/Robots/multi_mixin.py index b927195..e5d4353 100644 --- a/Robots/multi_mixin.py +++ b/Robots/multi_mixin.py @@ -1,4 +1,4 @@ -"""Mix-in for robots ath process multiple URLs in parallel. +"""Mix-in for robots that process multiple URLs in parallel. This file is a part of Bookmarks database and Internet robot.