]> git.phdru.name Git - bookmarks_db.git/blobdiff - Robots/bkmk_rrequests.py
Feat(Robots/bkmk_rrequests): Ignore all problems with certificates
[bookmarks_db.git] / Robots / bkmk_rrequests.py
index e1f00198dcda56a7f4d54835ce322d29ba51bc69..4577b1608d4992816f8a7ac943c339ec61cca1ac 100644 (file)
@@ -11,10 +11,15 @@ __license__ = "GNU GPL"
 __all__ = ['robot_requests']
 
 
+from urllib.parse import urlsplit
+import warnings
+
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.ssl_ import create_urllib3_context
 import requests
 import requests_ftp
 
-from Robots.bkmk_robot_base import robot_base
+from Robots.bkmk_robot_base import robot_base, request_headers
 
 requests_ftp.monkeypatch_session()
 
@@ -24,16 +29,42 @@ class robot_requests(robot_base):
     # BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080
     proxy = None
 
+    # Store hosts for which we already know they require proxy...
+    proxy_ok = set()
+    # ...but aren't accessible even through proxy
+    proxy_error = set()
+
     def get(self, bookmark, url, accept_charset=False):
-        error, r = request_get(url, self.timeout, None)
-        if error is not None:
-            self.log('   Error: %s' % error)
-            if self.proxy:
-                self.log('   Retrying with the proxy...')
-                error, r = request_get(url, self.timeout, self.proxy)
+        split_results = urlsplit(url)
+        url_host = split_results.hostname
+
+        if url_host in self.proxy_error:
+            return 'see prev. error', None, None, None, None
+
+        if accept_charset and bookmark.charset:
+            headers = request_headers.copy()
+            headers['Accept-Charset'] = bookmark.charset
+        else:
+            headers = request_headers
+
+        if url_host in self.proxy_ok:
+            self.log('   Immediately trying with the proxy')
+            error, r = request_get(url, headers, self.timeout, self.proxy)
+        else:
+            error, r = request_get(url, headers, self.timeout, None)
+            if error is not None:
+                self.log('   Error          : %s' % error)
+                if self.proxy and error != '404 not_found':
+                    self.log('   Retrying with the proxy...')
+                    error, r = request_get(url, headers,
+                                           self.timeout, self.proxy)
+                    if error is None:
+                        self.proxy_ok.add(url_host)
         if error is not None:
-            if self.proxy:
-                self.log('   Proxy error: %s' % error)
+            if self.proxy and error != '404 not_found':
+                self.log('   Proxy error    : %s' % error)
+                if url_host not in self.proxy_ok:
+                    self.proxy_error.add(url_host)
             return error, None, None, None, None
         if r.is_redirect:
             return None, r.status_code, r.next.url, None, None
@@ -43,16 +74,46 @@ class robot_requests(robot_base):
         return ''  # Alas, requests_ftp doesn't store welcome message
 
 
-def request_get(url, timeout, proxy):
+# See https://lukasa.co.uk/2017/02/Configuring_TLS_With_Requests/
+
+class AllCiphersAdapter(HTTPAdapter):
+    """
+    A TransportAdapter that re-enables 3DES support in Requests.
+    """
+    def init_poolmanager(self, *args, **kwargs):
+        context = create_urllib3_context(cert_reqs=0,
+                                         ciphers='ALL:@SECLEVEL=1')
+        kwargs['ssl_context'] = context
+        return super(AllCiphersAdapter, self).init_poolmanager(*args, **kwargs)
+
+    def proxy_manager_for(self, *args, **kwargs):
+        context = create_urllib3_context(cert_reqs=0,
+                                         ciphers='ALL:@SECLEVEL=1')
+        kwargs['ssl_context'] = context
+        return super(AllCiphersAdapter, self).proxy_manager_for(
+            *args, **kwargs)
+
+
+warnings.filterwarnings('ignore', 'Unverified HTTPS request is being made')
+
+
+def request_get(url, headers, timeout, proxy):
     if proxy:
         proxies = {'http': proxy, 'https': proxy}
     else:
         proxies = None
 
+    s = requests.Session()
+    s.mount('https://', AllCiphersAdapter())
+
     try:
-        r = requests.Session().get(
-            url, timeout=timeout, allow_redirects=False, proxies=proxies)
+        r = s.get(url, headers=headers, timeout=timeout,
+                  allow_redirects=False, proxies=proxies,
+                  verify=False)
     except requests.RequestException as e:
         return str(e), None
     else:
+        if r.status_code >= 400:
+            error = requests.status_codes._codes[r.status_code][0]
+            return '%d %s' % (r.status_code, error), None
         return None, r