__all__ = ['robot_requests']
+from urllib.parse import urlsplit
+
import requests
import requests_ftp
-from Robots.bkmk_robot_base import robot_base
+from Robots.bkmk_robot_base import robot_base, request_headers
requests_ftp.monkeypatch_session()
# BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080
proxy = None
+ # Store hosts for which we already know they require proxy...
+ proxy_ok = set()
+ # ...but aren't accessible even through proxy
+ proxy_error = set()
+
def get(self, bookmark, url, accept_charset=False):
- error, r = request_get(url, self.timeout, None)
- if error is not None:
- self.log(' Error: %s' % error)
- if self.proxy:
- self.log(' Retrying with the proxy...')
- error, r = request_get(url, self.timeout, self.proxy)
+ split_results = urlsplit(url)
+ url_host = split_results.hostname
+
+ if url_host in self.proxy_error:
+ return 'proxy error', None, None, None, None
+
+ if accept_charset and bookmark.charset:
+ headers = request_headers.copy()
+ headers['Accept-Charset'] = bookmark.charset
+ else:
+ headers = request_headers
+
+ if url_host in self.proxy_ok:
+ self.log(' Immediately trying with the proxy')
+ error, r = request_get(url, headers, self.timeout, self.proxy)
+ else:
+ error, r = request_get(url, headers, self.timeout, None)
+ if error is not None:
+ self.log(' Error: %s' % error)
+ if self.proxy:
+ self.log(' Retrying with the proxy...')
+ error, r = request_get(url, headers,
+ self.timeout, self.proxy)
+ if error is None:
+ self.proxy_ok.add(url_host)
if error is not None:
if self.proxy:
self.log(' Proxy error: %s' % error)
+ if url_host not in self.proxy_ok:
+ self.proxy_error.add(url_host)
return error, None, None, None, None
if r.is_redirect:
return None, r.status_code, r.next.url, None, None
return '' # Alas, requests_ftp doesn't store welcome message
-def request_get(url, timeout, proxy):
+def request_get(url, headers, timeout, proxy):
if proxy:
proxies = {'http': proxy, 'https': proxy}
else:
try:
r = requests.Session().get(
- url, timeout=timeout, allow_redirects=False, proxies=proxies)
+ url, headers=headers, timeout=timeout,
+ allow_redirects=False, proxies=proxies)
except requests.RequestException as e:
return str(e), None
else: