__all__ = ['robot_requests']
+from urllib.parse import urlsplit
+
import requests
import requests_ftp
# BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080
proxy = None
+ # Store hosts for which we already know they require proxy...
+ proxy_ok = set()
+ # ...but aren't accessible even through proxy
+ proxy_error = set()
+
def get(self, bookmark, url, accept_charset=False):
- error, r = request_get(url, self.timeout, None)
- if error is not None:
- self.log(' Error: %s' % error)
- if self.proxy:
- self.log(' Retrying with the proxy...')
- error, r = request_get(url, self.timeout, self.proxy)
+ split_results = urlsplit(url)
+ url_host = split_results.hostname
+
+ if url_host in self.proxy_error:
+ return 'proxy error', None, None, None, None
+
+ if url_host in self.proxy_ok:
+ self.log(' Immediately trying with the proxy')
+ error, r = request_get(url, self.timeout, self.proxy)
+ else:
+ error, r = request_get(url, self.timeout, None)
+ if error is not None:
+ self.log(' Error: %s' % error)
+ if self.proxy:
+ self.log(' Retrying with the proxy...')
+ error, r = request_get(url, self.timeout, self.proxy)
+ if error is None:
+ self.proxy_ok.add(url_host)
if error is not None:
if self.proxy:
self.log(' Proxy error: %s' % error)
+ if url_host not in self.proxy_ok:
+ self.proxy_error.add(url_host)
return error, None, None, None, None
if r.is_redirect:
return None, r.status_code, r.next.url, None, None