1 """Robot based on requests
3 This file is a part of Bookmarks database and Internet robot.
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2024 PhiloSoft Design"
9 __license__ = "GNU GPL"
11 __all__ = ['robot_requests']
14 from urllib.parse import urlsplit
17 from requests.adapters import HTTPAdapter
18 from requests.packages.urllib3.util.ssl_ import create_urllib3_context
22 from Robots.bkmk_robot_base import robot_base, request_headers
24 requests_ftp.monkeypatch_session()
27 class robot_requests(robot_base):
28 # Pass proxy from the environment like this:
29 # BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080
32 # Store hosts for which we already know they require proxy...
34 # ...but aren't accessible even through proxy
37 def get(self, bookmark, url, accept_charset=False):
38 split_results = urlsplit(url)
39 url_host = split_results.hostname
41 if url_host in self.proxy_error:
42 return 'see prev. error', None, None, None, None
44 if accept_charset and bookmark.charset:
45 headers = request_headers.copy()
46 headers['Accept-Charset'] = bookmark.charset
48 headers = request_headers
50 if url_host in self.proxy_ok:
51 self.log(' Immediately trying with the proxy')
52 error, r = request_get(url, headers, self.timeout, self.proxy)
54 error, r = request_get(url, headers, self.timeout, None)
56 self.log(' Error : %s' % error)
57 if self.proxy and error != '404 not_found':
58 self.log(' Retrying with the proxy...')
59 error, r = request_get(url, headers,
60 self.timeout, self.proxy)
62 self.proxy_ok.add(url_host)
64 if self.proxy and error != '404 not_found':
65 self.log(' Proxy error : %s' % error)
66 if url_host not in self.proxy_ok:
67 self.proxy_error.add(url_host)
68 return error, None, None, None, None
70 return None, r.status_code, r.next.url, None, None
71 return None, None, None, r.headers, r.content
73 def get_ftp_welcome(self):
74 return '' # Alas, requests_ftp doesn't store welcome message
77 # See https://lukasa.co.uk/2017/02/Configuring_TLS_With_Requests/
79 class AllCiphersAdapter(HTTPAdapter):
81 A TransportAdapter that re-enables 3DES support in Requests.
83 def init_poolmanager(self, *args, **kwargs):
84 context = create_urllib3_context(cert_reqs=0,
85 ciphers='ALL:@SECLEVEL=1')
86 kwargs['ssl_context'] = context
87 return super(AllCiphersAdapter, self).init_poolmanager(*args, **kwargs)
89 def proxy_manager_for(self, *args, **kwargs):
90 context = create_urllib3_context(cert_reqs=0,
91 ciphers='ALL:@SECLEVEL=1')
92 kwargs['ssl_context'] = context
93 return super(AllCiphersAdapter, self).proxy_manager_for(
97 warnings.filterwarnings('ignore', 'Unverified HTTPS request is being made')
100 def request_get(url, headers, timeout, proxy):
102 proxies = {'http': proxy, 'https': proxy}
106 s = requests.Session()
107 s.mount('https://', AllCiphersAdapter())
110 r = s.get(url, headers=headers, timeout=timeout,
111 allow_redirects=False, proxies=proxies,
113 except requests.RequestException as e:
116 if r.status_code >= 400:
117 error = requests.status_codes._codes[r.status_code][0]
118 return '%d %s' % (r.status_code, error), None