1 """Robot based on requests
3 This file is a part of Bookmarks database and Internet robot.
7 __author__ = "Oleg Broytman <phd@phdru.name>"
8 __copyright__ = "Copyright (C) 2024 PhiloSoft Design"
9 __license__ = "GNU GPL"
11 __all__ = ['robot_requests']
14 from urllib.parse import urlsplit
19 from Robots.bkmk_robot_base import robot_base, request_headers
21 requests_ftp.monkeypatch_session()
24 class robot_requests(robot_base):
25 # Pass proxy from the environment like this:
26 # BKMK_ROBOT=requests:proxy=socks5h%3a//localhost%3a1080
29 # Store hosts for which we already know they require proxy...
31 # ...but aren't accessible even through proxy
34 def get(self, bookmark, url, accept_charset=False):
35 split_results = urlsplit(url)
36 url_host = split_results.hostname
38 if url_host in self.proxy_error:
39 return 'proxy error', None, None, None, None
41 if accept_charset and bookmark.charset:
42 headers = request_headers.copy()
43 headers['Accept-Charset'] = bookmark.charset
45 headers = request_headers
47 if url_host in self.proxy_ok:
48 self.log(' Immediately trying with the proxy')
49 error, r = request_get(url, headers, self.timeout, self.proxy)
51 error, r = request_get(url, headers, self.timeout, None)
53 self.log(' Error: %s' % error)
55 self.log(' Retrying with the proxy...')
56 error, r = request_get(url, headers,
57 self.timeout, self.proxy)
59 self.proxy_ok.add(url_host)
62 self.log(' Proxy error: %s' % error)
63 if url_host not in self.proxy_ok:
64 self.proxy_error.add(url_host)
65 return error, None, None, None, None
67 return None, r.status_code, r.next.url, None, None
68 return None, None, None, r.headers, r.content
70 def get_ftp_welcome(self):
71 return '' # Alas, requests_ftp doesn't store welcome message
74 def request_get(url, headers, timeout, proxy):
76 proxies = {'http': proxy, 'https': proxy}
81 r = requests.Session().get(
82 url, headers=headers, timeout=timeout,
83 allow_redirects=False, proxies=proxies)
84 except requests.RequestException as e: