From 3b51e43f06c97c0d1a2ffbb1c29276acaeb64f07 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sat, 2 Mar 2024 12:13:42 +0300 Subject: [PATCH] Perf(Rebobt/requests): Speedup second access Use proxy immediately for hosts for which we already know they require proxy. Don't use proxy for hosts that aren't accessible even through proxy, immediately return an error. --- Makefile | 2 +- Robots/bkmk_rrequests.py | 33 +++++++++++++++++++++++++++------ doc/ANNOUNCE | 4 ++++ doc/ChangeLog | 4 ++++ 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index b49ebfb..06e2c47 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ # __license__ = "GNU GPL" # -VERSION=5.2.0 +VERSION=5.2.1 EXAMPLE_SHELL=\ bkmk-add bkmk-add.py bkmk-chk bkmk-publish bkmk-rsync bkmk-sort bkmk2db \ diff --git a/Robots/bkmk_rrequests.py b/Robots/bkmk_rrequests.py index e1f0019..114d670 100644 --- a/Robots/bkmk_rrequests.py +++ b/Robots/bkmk_rrequests.py @@ -11,6 +11,8 @@ __license__ = "GNU GPL" __all__ = ['robot_requests'] +from urllib.parse import urlsplit + import requests import requests_ftp @@ -24,16 +26,35 @@ class robot_requests(robot_base): # BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080 proxy = None + # Store hosts for which we already know they require proxy... + proxy_ok = set() + # ...but aren't accessible even through proxy + proxy_error = set() + def get(self, bookmark, url, accept_charset=False): - error, r = request_get(url, self.timeout, None) - if error is not None: - self.log(' Error: %s' % error) - if self.proxy: - self.log(' Retrying with the proxy...') - error, r = request_get(url, self.timeout, self.proxy) + split_results = urlsplit(url) + url_host = split_results.hostname + + if url_host in self.proxy_error: + return 'proxy error', None, None, None, None + + if url_host in self.proxy_ok: + self.log(' Immediately trying with the proxy') + error, r = request_get(url, self.timeout, self.proxy) + else: + error, r = request_get(url, self.timeout, None) + if error is not None: + self.log(' Error: %s' % error) + if self.proxy: + self.log(' Retrying with the proxy...') + error, r = request_get(url, self.timeout, self.proxy) + if error is None: + self.proxy_ok.add(url_host) if error is not None: if self.proxy: self.log(' Proxy error: %s' % error) + if url_host not in self.proxy_ok: + self.proxy_error.add(url_host) return error, None, None, None, None if r.is_redirect: return None, r.status_code, r.next.url, None, None diff --git a/doc/ANNOUNCE b/doc/ANNOUNCE index 2eaac95..a3340e6 100644 --- a/doc/ANNOUNCE +++ b/doc/ANNOUNCE @@ -6,6 +6,10 @@ WHAT IS IT bookmarks.html. WHAT'S NEW +Version 5.2.1 (2024-03-02) + + Speedup second access through proxy. + Version 5.2.0 (2024-03-02) For the robot based on requests allow to use a proxy. diff --git a/doc/ChangeLog b/doc/ChangeLog index 204a306..fa40ada 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +Version 5.2.1 (2024-03-02) + + Speedup second access through proxy. + Version 5.2.0 (2024-03-02) For the robot based on requests allow to use a proxy. -- 2.39.5