1 """Robot based on requests
3 This file is a part of Bookmarks database and Internet robot.
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2024 PhiloSoft Design"
9 __license__ = "GNU GPL"
11 __all__ = ['robot_requests']
14 from urllib.parse import urlsplit
19 from Robots.bkmk_robot_base import robot_base
21 requests_ftp.monkeypatch_session()
24 class robot_requests(robot_base):
25 # Pass proxy from the environment like this:
26 # BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080
29 # Store hosts for which we already know they require proxy...
31 # ...but aren't accessible even through proxy
34 def get(self, bookmark, url, accept_charset=False):
35 split_results = urlsplit(url)
36 url_host = split_results.hostname
38 if url_host in self.proxy_error:
39 return 'proxy error', None, None, None, None
41 if url_host in self.proxy_ok:
42 self.log(' Immediately trying with the proxy')
43 error, r = request_get(url, self.timeout, self.proxy)
45 error, r = request_get(url, self.timeout, None)
47 self.log(' Error: %s' % error)
49 self.log(' Retrying with the proxy...')
50 error, r = request_get(url, self.timeout, self.proxy)
52 self.proxy_ok.add(url_host)
55 self.log(' Proxy error: %s' % error)
56 if url_host not in self.proxy_ok:
57 self.proxy_error.add(url_host)
58 return error, None, None, None, None
60 return None, r.status_code, r.next.url, None, None
61 return None, None, None, r.headers, r.content
63 def get_ftp_welcome(self):
64 return '' # Alas, requests_ftp doesn't store welcome message
67 def request_get(url, timeout, proxy):
69 proxies = {'http': proxy, 'https': proxy}
74 r = requests.Session().get(
75 url, timeout=timeout, allow_redirects=False, proxies=proxies)
76 except requests.RequestException as e: