X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_rrequests.py;h=c6a91af3a962a3da3057c022b08edaaf96542ee3;hb=34ea60a99f36e3bd78a2f888475d9e66644cb504;hp=114d67094586ed97d928768803aed3704f075790;hpb=3b51e43f06c97c0d1a2ffbb1c29276acaeb64f07;p=bookmarks_db.git diff --git a/Robots/bkmk_rrequests.py b/Robots/bkmk_rrequests.py index 114d670..c6a91af 100644 --- a/Robots/bkmk_rrequests.py +++ b/Robots/bkmk_rrequests.py @@ -16,7 +16,7 @@ from urllib.parse import urlsplit import requests import requests_ftp -from Robots.bkmk_robot_base import robot_base +from Robots.bkmk_robot_base import robot_base, request_headers requests_ftp.monkeypatch_session() @@ -36,23 +36,30 @@ class robot_requests(robot_base): url_host = split_results.hostname if url_host in self.proxy_error: - return 'proxy error', None, None, None, None + return 'see prev. error', None, None, None, None + + if accept_charset and bookmark.charset: + headers = request_headers.copy() + headers['Accept-Charset'] = bookmark.charset + else: + headers = request_headers if url_host in self.proxy_ok: self.log(' Immediately trying with the proxy') - error, r = request_get(url, self.timeout, self.proxy) + error, r = request_get(url, headers, self.timeout, self.proxy) else: - error, r = request_get(url, self.timeout, None) + error, r = request_get(url, headers, self.timeout, None) if error is not None: - self.log(' Error: %s' % error) - if self.proxy: + self.log(' Error : %s' % error) + if self.proxy and error != '404 not_found': self.log(' Retrying with the proxy...') - error, r = request_get(url, self.timeout, self.proxy) + error, r = request_get(url, headers, + self.timeout, self.proxy) if error is None: self.proxy_ok.add(url_host) if error is not None: - if self.proxy: - self.log(' Proxy error: %s' % error) + if self.proxy and error != '404 not_found': + self.log(' Proxy error : %s' % error) if url_host not in self.proxy_ok: self.proxy_error.add(url_host) return error, None, None, None, None @@ -64,7 +71,7 @@ class robot_requests(robot_base): return '' # Alas, requests_ftp doesn't store welcome message -def request_get(url, timeout, proxy): +def request_get(url, headers, timeout, proxy): if proxy: proxies = {'http': proxy, 'https': proxy} else: @@ -72,8 +79,12 @@ def request_get(url, timeout, proxy): try: r = requests.Session().get( - url, timeout=timeout, allow_redirects=False, proxies=proxies) + url, headers=headers, timeout=timeout, + allow_redirects=False, proxies=proxies) except requests.RequestException as e: return str(e), None else: + if r.status_code >= 400: + error = requests.status_codes._codes[r.status_code][0] + return '%d %s' % (r.status_code, error), None return None, r