From: Oleg Broytman Date: Thu, 22 Aug 2024 21:46:45 +0000 (+0300) Subject: Feat(Robots): Simplify `.get()` X-Git-Tag: 6.1.0~8 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=c35386e4d31281fd714565be87a2d96ec4a1a782;p=bookmarks_db.git Feat(Robots): Simplify `.get()` Return data without processing, let's `get_url()` process it in one place. --- diff --git a/Robots/base.py b/Robots/base.py index 36b369a..b165c9b 100644 --- a/Robots/base.py +++ b/Robots/base.py @@ -115,7 +115,11 @@ class robot_base(Robot): await self.get_url(bookmark.href, req_headers) if error is not None: - bookmark.error = error + bookmark.error = str(error) + return + + if http_status_code and (http_status_code >= 400): + bookmark.error = 'Error %d' % http_status_code return if http_status_code and (http_status_code >= 300): @@ -373,10 +377,10 @@ class robot_base(Robot): if use_proxy and url_host in self.proxy_ok: self.log(' Immediately trying with the proxy') - error, http_status_code, redirect_to, resp_headers, content = \ + error, http_status_code, resp_headers, content = \ await self.get(url, req_headers, use_proxy=True) else: - error, http_status_code, redirect_to, resp_headers, content = \ + error, http_status_code, resp_headers, content = \ await self.get(url, req_headers) if error is not None and ( not url_host.startswith('localhost') and @@ -385,7 +389,7 @@ class robot_base(Robot): self.log(' Error : %s' % error) if use_proxy and http_status_code != 404: self.log(' Retrying with the proxy...') - error, http_status_code, redirect_to, resp_headers, content = \ + error, http_status_code, resp_headers, content = \ await self.get(url, req_headers, use_proxy=True) if error is None: self.proxy_ok.add(url_host) @@ -398,6 +402,10 @@ class robot_base(Robot): self.proxy_error.add(url_host) return error, http_status_code, None, None, None if http_status_code and (http_status_code >= 300): + if resp_headers: + redirect_to = resp_headers['Location'] + else: + redirect_to = 'UNKNOWN' return None, http_status_code, redirect_to, None, None return None, None, None, resp_headers, content diff --git a/Robots/bkmk_raiohttp.py b/Robots/bkmk_raiohttp.py index 56cbf77..2b82fee 100644 --- a/Robots/bkmk_raiohttp.py +++ b/Robots/bkmk_raiohttp.py @@ -36,8 +36,8 @@ class robot_aiohttp(robot_base): ) if error is not None: error = str(error) - return error, None, None, None, None - return None, None, None, None, body + return error, None, None, None + return None, None, None, body if use_proxy: proxy = self.proxy @@ -48,17 +48,7 @@ class robot_aiohttp(robot_base): url, req_headers, proxy=proxy, timeout=self.timeout, ) - if error is not None or (status and status >= 400): - if error is None: - error = 'Error %d' % status - else: - error = str(error) - if status: - error = 'Error %d %s' % (status, error) - return error, status, None, None, None - if status and status >= 300: - return None, status, resp_headers['Location'], None, None - return None, status, None, resp_headers, body + return error, status, resp_headers, body def get_ftp_welcome(self): return '' # We don't store welcome message yet diff --git a/Robots/bkmk_rcurl.py b/Robots/bkmk_rcurl.py index f1258b1..73bfbe3 100644 --- a/Robots/bkmk_rcurl.py +++ b/Robots/bkmk_rcurl.py @@ -30,16 +30,13 @@ class robot_curl(robot_base): curlw.perform() except pycurl.error as e: error = str(e) - return error, None, None, None, None - - status = curlw.getinfo(pycurl.HTTP_CODE) + status = None + else: + error = None + status = curlw.getinfo(pycurl.HTTP_CODE) curlw.close() - if status >= 400: - return "Error %d" % status, status, None, None, None - if status >= 300: - return None, status, curlw.resp_headers['Location'], None, None - return None, None, None, curlw.resp_headers, curlw.body + return error, status, curlw.resp_headers, curlw.body def get_ftp_welcome(self): return '' # We don't store welcome message yet diff --git a/Robots/bkmk_rrequests.py b/Robots/bkmk_rrequests.py index 40e6ef7..eb95eea 100644 --- a/Robots/bkmk_rrequests.py +++ b/Robots/bkmk_rrequests.py @@ -34,7 +34,7 @@ class robot_requests(robot_base): if error is not None: return error, None, None, None, None self.welcome = welcome - return None, None, None, None, body + return None, None, None, body if use_proxy: proxies = {'http': self.proxy, 'https': self.proxy} @@ -51,21 +51,9 @@ class robot_requests(robot_base): verify=False) except requests.RequestException as e: error = str(e) - else: - if r.status_code >= 400: - try: - error = requests.status_codes._codes[r.status_code][0] - except KeyError: - error = "Error" - error = '%d %s' % (r.status_code, error) - - if error is not None: - return error, \ - r.status_code if r is not None else None, \ - None, None, None - if r.is_redirect: - return None, r.status_code, r.next.url, None, None - return None, None, None, r.headers, r.content + return error, None, None, None + + return None, r.status_code, r.headers, r.content def get_ftp_welcome(self): welcome = self.welcome