From 0b92b89ca02008f0dc2eb2a04ab56c15740b14c0 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 18 Aug 2024 23:28:56 +0300 Subject: [PATCH] Style(bkmk_rmultirequests): Renamed max_workers to max_urls --- Robots/bkmk_rmultirequests.py | 15 +++++++-------- doc/ANNOUNCE | 24 ++---------------------- doc/ChangeLog | 4 ++++ 3 files changed, 13 insertions(+), 30 deletions(-) diff --git a/Robots/bkmk_rmultirequests.py b/Robots/bkmk_rmultirequests.py index 16a3c66..5ae0e37 100644 --- a/Robots/bkmk_rmultirequests.py +++ b/Robots/bkmk_rmultirequests.py @@ -25,17 +25,17 @@ cpu_count = os.cpu_count() class robot_multirequests(robot_requests): concurrent_class = concurrent.futures.ProcessPoolExecutor # or ThreadPoolExecutor # noqa: E501 line too long # We're I/O-bound, not CPU-bound - max_workers = 2*cpu_count if cpu_count else 10 + max_urls = 2*cpu_count if cpu_count else 10 def __init__(self, *args, **kw): - if isinstance(self.max_workers, str): - self.max_workers = int(self.max_workers) + if isinstance(self.max_urls, str): + self.max_urls = int(self.max_urls) concurrent_class = getattr(concurrent.futures, self.concurrent_class) \ if isinstance(self.concurrent_class, str) \ else self.concurrent_class self.concurrent_class_name = concurrent_class.__name__ robot_requests.__init__(self, *args, **kw) - self.executor = concurrent_class(max_workers=self.max_workers) + self.executor = concurrent_class(max_urls=self.max_urls) # Bookmarks waiting to be processed; # maps {URL: [bookmark, saved parent, future]} @@ -52,7 +52,7 @@ class robot_multirequests(robot_requests): if href in bookmarks: return bookmarks[href] = [bookmark, None, None] - if len(bookmarks) < self.max_workers: + if len(bookmarks) < self.max_urls: return self.wait() @@ -61,7 +61,7 @@ class robot_multirequests(robot_requests): bookmarks = self.bookmarks pending = self.pending - free_workers = self.max_workers - len(pending) + free_workers = self.max_urls - len(pending) if bookmarks and (free_workers > 0): # there's job and free workers, for href in bookmarks: bookmark, parent, ft = bookmarks[href] @@ -82,6 +82,7 @@ class robot_multirequests(robot_requests): done, pending = concurrent.futures.wait( pending, self.timeout+1, return_when=concurrent.futures.FIRST_COMPLETED) + self.pending = pending for ft in done: new_bkmk, log_lines = ft.result() @@ -94,8 +95,6 @@ class robot_multirequests(robot_requests): for line in log_lines: log(line) - self.pending = pending - def stop(self): while self.bookmarks or self.pending: self.wait() diff --git a/doc/ANNOUNCE b/doc/ANNOUNCE index 4efc6b2..82765aa 100644 --- a/doc/ANNOUNCE +++ b/doc/ANNOUNCE @@ -7,29 +7,9 @@ bookmarks.html. WHAT'S NEW -Version 5.7.0 (2024-08-16) +Version 6.0.0 (2024-??-??) - Robot bkmk_rrequests: Use ftplib directly, without requests_ftp. - - Robots: Removed connect_timeout, added ftp_timeout. - - Robot bkmk_raiohttp: Use aiohttp-socks for aiohttp, siosocks for aioftp. - -Version 5.6.1 (2024-08-15) - - Minor fixes. - -Version 5.6.0 (2024-08-15) - - Robot based on requests and concurrent.futures, - processes multiple URLs in parallel. Multiprocess variant works - very well, multithreading not so good (too many sites report errors). - - Removed urllib-based robots. - - Dropped support for Python 2. - - Default list of robots is now curl,requests,aiohttp. + Renamed max_workers to max_urls. WHERE TO GET diff --git a/doc/ChangeLog b/doc/ChangeLog index 9a8b2ea..32d24d8 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +Version 6.0.0 (2024-??-??) + + Renamed max_workers to max_urls. + Version 5.7.0 (2024-08-16) Robot bkmk_rrequests: Use ftplib directly, without requests_ftp. -- 2.39.5