X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_rrequests.py;h=4577b1608d4992816f8a7ac943c339ec61cca1ac;hb=6f0376049642867b164c6f93798906be77c2a166;hp=24b09d6803ef763b24406267599f61bb2df73911;hpb=ed7127ea175cab12642244ebd2d86276adcb2e25;p=bookmarks_db.git diff --git a/Robots/bkmk_rrequests.py b/Robots/bkmk_rrequests.py index 24b09d6..4577b16 100644 --- a/Robots/bkmk_rrequests.py +++ b/Robots/bkmk_rrequests.py @@ -11,10 +11,15 @@ __license__ = "GNU GPL" __all__ = ['robot_requests'] +from urllib.parse import urlsplit +import warnings + +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.ssl_ import create_urllib3_context import requests import requests_ftp -from Robots.bkmk_robot_base import robot_base +from Robots.bkmk_robot_base import robot_base, request_headers requests_ftp.monkeypatch_session() @@ -24,28 +29,91 @@ class robot_requests(robot_base): # BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080 proxy = None + # Store hosts for which we already know they require proxy... + proxy_ok = set() + # ...but aren't accessible even through proxy + proxy_error = set() + def get(self, bookmark, url, accept_charset=False): - try: - r = requests.Session().get( - url, timeout=self.timeout, allow_redirects=False) - except requests.RequestException as e: - error = str(e) - self.log(' Error: %s' % error) - if self.proxy: - error = None - self.log(' Retrying with the proxy...') - try: - r = requests.get( - url, timeout=self.timeout, allow_redirects=False, - proxies={'http': self.proxy, 'https': self.proxy}) - except requests.RequestException as e: - error = str(e) - self.log(' Proxy error: %s' % error) + split_results = urlsplit(url) + url_host = split_results.hostname + + if url_host in self.proxy_error: + return 'see prev. error', None, None, None, None + + if accept_charset and bookmark.charset: + headers = request_headers.copy() + headers['Accept-Charset'] = bookmark.charset + else: + headers = request_headers + + if url_host in self.proxy_ok: + self.log(' Immediately trying with the proxy') + error, r = request_get(url, headers, self.timeout, self.proxy) + else: + error, r = request_get(url, headers, self.timeout, None) if error is not None: - return error, None, None, None, None + self.log(' Error : %s' % error) + if self.proxy and error != '404 not_found': + self.log(' Retrying with the proxy...') + error, r = request_get(url, headers, + self.timeout, self.proxy) + if error is None: + self.proxy_ok.add(url_host) + if error is not None: + if self.proxy and error != '404 not_found': + self.log(' Proxy error : %s' % error) + if url_host not in self.proxy_ok: + self.proxy_error.add(url_host) + return error, None, None, None, None if r.is_redirect: return None, r.status_code, r.next.url, None, None return None, None, None, r.headers, r.content def get_ftp_welcome(self): return '' # Alas, requests_ftp doesn't store welcome message + + +# See https://lukasa.co.uk/2017/02/Configuring_TLS_With_Requests/ + +class AllCiphersAdapter(HTTPAdapter): + """ + A TransportAdapter that re-enables 3DES support in Requests. + """ + def init_poolmanager(self, *args, **kwargs): + context = create_urllib3_context(cert_reqs=0, + ciphers='ALL:@SECLEVEL=1') + kwargs['ssl_context'] = context + return super(AllCiphersAdapter, self).init_poolmanager(*args, **kwargs) + + def proxy_manager_for(self, *args, **kwargs): + context = create_urllib3_context(cert_reqs=0, + ciphers='ALL:@SECLEVEL=1') + kwargs['ssl_context'] = context + return super(AllCiphersAdapter, self).proxy_manager_for( + *args, **kwargs) + + +warnings.filterwarnings('ignore', 'Unverified HTTPS request is being made') + + +def request_get(url, headers, timeout, proxy): + if proxy: + proxies = {'http': proxy, 'https': proxy} + else: + proxies = None + + s = requests.Session() + s.mount('https://', AllCiphersAdapter()) + + try: + r = s.get(url, headers=headers, timeout=timeout, + allow_redirects=False, proxies=proxies, + verify=False) + except requests.RequestException as e: + return str(e), None + else: + if r.status_code >= 400: + error = requests.status_codes._codes[r.status_code][0] + return '%d %s' % (r.status_code, error), None + return None, r