]> git.phdru.name Git - bookmarks_db.git/blob - Robots/bkmk_rrequests.py
Feat(Robots/bkmk_rrequests): Ignore all problems with certificates
[bookmarks_db.git] / Robots / bkmk_rrequests.py
1 """Robot based on requests
2
3 This file is a part of Bookmarks database and Internet robot.
4
5 """
6
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2024 PhiloSoft Design"
9 __license__ = "GNU GPL"
10
11 __all__ = ['robot_requests']
12
13
14 from urllib.parse import urlsplit
15 import warnings
16
17 from requests.adapters import HTTPAdapter
18 from requests.packages.urllib3.util.ssl_ import create_urllib3_context
19 import requests
20 import requests_ftp
21
22 from Robots.bkmk_robot_base import robot_base, request_headers
23
24 requests_ftp.monkeypatch_session()
25
26
27 class robot_requests(robot_base):
28     # Pass proxy from the environment like this:
29     # BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080
30     proxy = None
31
32     # Store hosts for which we already know they require proxy...
33     proxy_ok = set()
34     # ...but aren't accessible even through proxy
35     proxy_error = set()
36
37     def get(self, bookmark, url, accept_charset=False):
38         split_results = urlsplit(url)
39         url_host = split_results.hostname
40
41         if url_host in self.proxy_error:
42             return 'see prev. error', None, None, None, None
43
44         if accept_charset and bookmark.charset:
45             headers = request_headers.copy()
46             headers['Accept-Charset'] = bookmark.charset
47         else:
48             headers = request_headers
49
50         if url_host in self.proxy_ok:
51             self.log('   Immediately trying with the proxy')
52             error, r = request_get(url, headers, self.timeout, self.proxy)
53         else:
54             error, r = request_get(url, headers, self.timeout, None)
55             if error is not None:
56                 self.log('   Error          : %s' % error)
57                 if self.proxy and error != '404 not_found':
58                     self.log('   Retrying with the proxy...')
59                     error, r = request_get(url, headers,
60                                            self.timeout, self.proxy)
61                     if error is None:
62                         self.proxy_ok.add(url_host)
63         if error is not None:
64             if self.proxy and error != '404 not_found':
65                 self.log('   Proxy error    : %s' % error)
66                 if url_host not in self.proxy_ok:
67                     self.proxy_error.add(url_host)
68             return error, None, None, None, None
69         if r.is_redirect:
70             return None, r.status_code, r.next.url, None, None
71         return None, None, None, r.headers, r.content
72
73     def get_ftp_welcome(self):
74         return ''  # Alas, requests_ftp doesn't store welcome message
75
76
77 # See https://lukasa.co.uk/2017/02/Configuring_TLS_With_Requests/
78
79 class AllCiphersAdapter(HTTPAdapter):
80     """
81     A TransportAdapter that re-enables 3DES support in Requests.
82     """
83     def init_poolmanager(self, *args, **kwargs):
84         context = create_urllib3_context(cert_reqs=0,
85                                          ciphers='ALL:@SECLEVEL=1')
86         kwargs['ssl_context'] = context
87         return super(AllCiphersAdapter, self).init_poolmanager(*args, **kwargs)
88
89     def proxy_manager_for(self, *args, **kwargs):
90         context = create_urllib3_context(cert_reqs=0,
91                                          ciphers='ALL:@SECLEVEL=1')
92         kwargs['ssl_context'] = context
93         return super(AllCiphersAdapter, self).proxy_manager_for(
94             *args, **kwargs)
95
96
97 warnings.filterwarnings('ignore', 'Unverified HTTPS request is being made')
98
99
100 def request_get(url, headers, timeout, proxy):
101     if proxy:
102         proxies = {'http': proxy, 'https': proxy}
103     else:
104         proxies = None
105
106     s = requests.Session()
107     s.mount('https://', AllCiphersAdapter())
108
109     try:
110         r = s.get(url, headers=headers, timeout=timeout,
111                   allow_redirects=False, proxies=proxies,
112                   verify=False)
113     except requests.RequestException as e:
114         return str(e), None
115     else:
116         if r.status_code >= 400:
117             error = requests.status_codes._codes[r.status_code][0]
118             return '%d %s' % (r.status_code, error), None
119         return None, r