__all__ = ['robot_requests']
+from urllib.parse import urlsplit
+import warnings
+
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.ssl_ import create_urllib3_context
import requests
import requests_ftp
-from Robots.bkmk_robot_base import robot_base
+from Robots.bkmk_robot_base import robot_base, request_headers
requests_ftp.monkeypatch_session()
# BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080
proxy = None
+ # Store hosts for which we already know they require proxy...
+ proxy_ok = set()
+ # ...but aren't accessible even through proxy
+ proxy_error = set()
+
def get(self, bookmark, url, accept_charset=False):
- try:
- r = requests.Session().get(
- url, timeout=self.timeout, allow_redirects=False)
- except requests.RequestException as e:
- error = str(e)
- self.log(' Error: %s' % error)
- if self.proxy:
- error = None
- self.log(' Retrying with the proxy...')
- try:
- r = requests.get(
- url, timeout=self.timeout, allow_redirects=False,
- proxies={'http': self.proxy, 'https': self.proxy})
- except requests.RequestException as e:
- error = str(e)
- self.log(' Proxy error: %s' % error)
+ split_results = urlsplit(url)
+ url_host = split_results.hostname
+
+ if url_host in self.proxy_error:
+ return 'see prev. error', None, None, None, None
+
+ if accept_charset and bookmark.charset:
+ headers = request_headers.copy()
+ headers['Accept-Charset'] = bookmark.charset
+ else:
+ headers = request_headers
+
+ if url_host in self.proxy_ok:
+ self.log(' Immediately trying with the proxy')
+ error, r = request_get(url, headers, self.timeout, self.proxy)
+ else:
+ error, r = request_get(url, headers, self.timeout, None)
if error is not None:
- return error, None, None, None, None
+ self.log(' Error : %s' % error)
+ if self.proxy and error != '404 not_found':
+ self.log(' Retrying with the proxy...')
+ error, r = request_get(url, headers,
+ self.timeout, self.proxy)
+ if error is None:
+ self.proxy_ok.add(url_host)
+ if error is not None:
+ if self.proxy and error != '404 not_found':
+ self.log(' Proxy error : %s' % error)
+ if url_host not in self.proxy_ok:
+ self.proxy_error.add(url_host)
+ return error, None, None, None, None
if r.is_redirect:
return None, r.status_code, r.next.url, None, None
return None, None, None, r.headers, r.content
def get_ftp_welcome(self):
return '' # Alas, requests_ftp doesn't store welcome message
+
+
+# See https://lukasa.co.uk/2017/02/Configuring_TLS_With_Requests/
+
+class AllCiphersAdapter(HTTPAdapter):
+ """
+ A TransportAdapter that re-enables 3DES support in Requests.
+ """
+ def init_poolmanager(self, *args, **kwargs):
+ context = create_urllib3_context(cert_reqs=0,
+ ciphers='ALL:@SECLEVEL=1')
+ kwargs['ssl_context'] = context
+ return super(AllCiphersAdapter, self).init_poolmanager(*args, **kwargs)
+
+ def proxy_manager_for(self, *args, **kwargs):
+ context = create_urllib3_context(cert_reqs=0,
+ ciphers='ALL:@SECLEVEL=1')
+ kwargs['ssl_context'] = context
+ return super(AllCiphersAdapter, self).proxy_manager_for(
+ *args, **kwargs)
+
+
+warnings.filterwarnings('ignore', 'Unverified HTTPS request is being made')
+
+
+def request_get(url, headers, timeout, proxy):
+ if proxy:
+ proxies = {'http': proxy, 'https': proxy}
+ else:
+ proxies = None
+
+ s = requests.Session()
+ s.mount('https://', AllCiphersAdapter())
+
+ try:
+ r = s.get(url, headers=headers, timeout=timeout,
+ allow_redirects=False, proxies=proxies,
+ verify=False)
+ except requests.RequestException as e:
+ return str(e), None
+ else:
+ if r.status_code >= 400:
+ error = requests.status_codes._codes[r.status_code][0]
+ return '%d %s' % (r.status_code, error), None
+ return None, r