X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_rrequests.py;h=114d67094586ed97d928768803aed3704f075790;hb=3b51e43f06c97c0d1a2ffbb1c29276acaeb64f07;hp=e1f00198dcda56a7f4d54835ce322d29ba51bc69;hpb=011586eca56d3bde3fef2087579e6cbc0682f5b4;p=bookmarks_db.git diff --git a/Robots/bkmk_rrequests.py b/Robots/bkmk_rrequests.py index e1f0019..114d670 100644 --- a/Robots/bkmk_rrequests.py +++ b/Robots/bkmk_rrequests.py @@ -11,6 +11,8 @@ __license__ = "GNU GPL" __all__ = ['robot_requests'] +from urllib.parse import urlsplit + import requests import requests_ftp @@ -24,16 +26,35 @@ class robot_requests(robot_base): # BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080 proxy = None + # Store hosts for which we already know they require proxy... + proxy_ok = set() + # ...but aren't accessible even through proxy + proxy_error = set() + def get(self, bookmark, url, accept_charset=False): - error, r = request_get(url, self.timeout, None) - if error is not None: - self.log(' Error: %s' % error) - if self.proxy: - self.log(' Retrying with the proxy...') - error, r = request_get(url, self.timeout, self.proxy) + split_results = urlsplit(url) + url_host = split_results.hostname + + if url_host in self.proxy_error: + return 'proxy error', None, None, None, None + + if url_host in self.proxy_ok: + self.log(' Immediately trying with the proxy') + error, r = request_get(url, self.timeout, self.proxy) + else: + error, r = request_get(url, self.timeout, None) + if error is not None: + self.log(' Error: %s' % error) + if self.proxy: + self.log(' Retrying with the proxy...') + error, r = request_get(url, self.timeout, self.proxy) + if error is None: + self.proxy_ok.add(url_host) if error is not None: if self.proxy: self.log(' Proxy error: %s' % error) + if url_host not in self.proxy_ok: + self.proxy_error.add(url_host) return error, None, None, None, None if r.is_redirect: return None, r.status_code, r.next.url, None, None