From 89a6bf87cd183b057ffb154d46147c43ec1da597 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Wed, 21 Aug 2024 17:32:25 +0300 Subject: [PATCH] Refactor(Robots): Separate `headers` into `req_headers` and `resp_headers` --- Robots/base.py | 36 ++++++++++++++++++------------------ Robots/bkmk_raiohttp.py | 9 +++++---- Robots/bkmk_rcurl.py | 14 +++++++------- Robots/bkmk_rrequests.py | 4 ++-- Robots/multi_async_mixin.py | 4 ++-- 5 files changed, 34 insertions(+), 33 deletions(-) diff --git a/Robots/base.py b/Robots/base.py index e3cf461..36b369a 100644 --- a/Robots/base.py +++ b/Robots/base.py @@ -106,13 +106,13 @@ class robot_base(Robot): bookmark.icon = None if bookmark.charset: - headers = request_headers.copy() - headers['Accept-Charset'] = bookmark.charset + req_headers = request_headers.copy() + req_headers['Accept-Charset'] = bookmark.charset else: - headers = request_headers + req_headers = request_headers - error, http_status_code, redirect_to, headers, content = \ - await self.get_url(bookmark.href, headers) + error, http_status_code, redirect_to, resp_headers, content = \ + await self.get_url(bookmark.href, req_headers) if error is not None: bookmark.error = error @@ -125,14 +125,14 @@ class robot_base(Robot): size = 0 last_modified = None - if headers: + if resp_headers: try: - size = headers["Content-Length"] + size = resp_headers["Content-Length"] except KeyError: pass try: - last_modified = headers["Last-Modified"] + last_modified = resp_headers["Last-Modified"] except KeyError: pass @@ -151,9 +151,9 @@ class robot_base(Robot): bookmark.last_modified = last_modified charset = None - if headers: + if resp_headers: try: - content_type = headers["Content-Type"] + content_type = resp_headers["Content-Type"] self.log(" Content-Type : %s" % content_type) if content_type is None: if b'html' in content.lower(): @@ -352,7 +352,7 @@ class robot_base(Robot): finally: self.finish_check_url(bookmark) - async def get_url(self, url, headers): + async def get_url(self, url, req_headers): split_results = urlsplit(url) url_proto = split_results.scheme url_host = split_results.hostname @@ -373,11 +373,11 @@ class robot_base(Robot): if use_proxy and url_host in self.proxy_ok: self.log(' Immediately trying with the proxy') - error, http_status_code, redirect_to, headers, content = \ - await self.get(url, headers, use_proxy=True) + error, http_status_code, redirect_to, resp_headers, content = \ + await self.get(url, req_headers, use_proxy=True) else: - error, http_status_code, redirect_to, headers, content = \ - await self.get(url, headers) + error, http_status_code, redirect_to, resp_headers, content = \ + await self.get(url, req_headers) if error is not None and ( not url_host.startswith('localhost') and not url_host.startswith('127.') @@ -385,8 +385,8 @@ class robot_base(Robot): self.log(' Error : %s' % error) if use_proxy and http_status_code != 404: self.log(' Retrying with the proxy...') - error, http_status_code, redirect_to, headers, content = \ - await self.get(url, headers, use_proxy=True) + error, http_status_code, redirect_to, resp_headers, content = \ + await self.get(url, req_headers, use_proxy=True) if error is None: self.proxy_ok.add(url_host) if (error is not None) or ( @@ -399,7 +399,7 @@ class robot_base(Robot): return error, http_status_code, None, None, None if http_status_code and (http_status_code >= 300): return None, http_status_code, redirect_to, None, None - return None, None, None, headers, content + return None, None, None, resp_headers, content def set_redirect(self, bookmark, errcode, newurl): bookmark.moved = moved = "(%s) to %s" % (_reloc_dict[errcode], newurl) diff --git a/Robots/bkmk_raiohttp.py b/Robots/bkmk_raiohttp.py index fa59360..56cbf77 100644 --- a/Robots/bkmk_raiohttp.py +++ b/Robots/bkmk_raiohttp.py @@ -29,7 +29,7 @@ class robot_aiohttp(robot_base): def version_str(self): return 'aiohttp/%s' % aiohttp.__version__ - async def get(self, url, headers, use_proxy=False): + async def get(self, url, req_headers, use_proxy=False): if url.startswith('ftp://'): error, body = await _get_ftp( url, timeout=self.ftp_timeout, @@ -45,7 +45,7 @@ class robot_aiohttp(robot_base): proxy = None error, status, resp_headers, body = await _get_http( - url, headers=headers, proxy=proxy, + url, req_headers, proxy=proxy, timeout=self.timeout, ) if error is not None or (status and status >= 400): @@ -64,7 +64,7 @@ class robot_aiohttp(robot_base): return '' # We don't store welcome message yet -async def _get_http(url, headers={}, proxy=None, timeout=60): +async def _get_http(url, req_headers={}, proxy=None, timeout=60): connector = None if proxy and proxy.startswith('socks5'): if proxy.startswith('socks5h://'): @@ -87,7 +87,8 @@ async def _get_http(url, headers={}, proxy=None, timeout=60): connector=connector, timeout=timeout ) as session: async with session.get( - url, headers=headers, proxy=proxy, allow_redirects=False, + url, headers=req_headers, + proxy=proxy, allow_redirects=False, ssl_context=ssl_context) as resp: return None, resp.status, resp.headers, await resp.read() except ( diff --git a/Robots/bkmk_rcurl.py b/Robots/bkmk_rcurl.py index a144805..3c7b4df 100644 --- a/Robots/bkmk_rcurl.py +++ b/Robots/bkmk_rcurl.py @@ -23,11 +23,11 @@ class robot_curl(robot_base): def version_str(self): return str(pycurl.version) - async def get(self, url, headers, use_proxy=False): - headers = ['%s: %s' % (k, v) for k, v in headers.items()] + async def get(self, url, req_headers, use_proxy=False): + req_headers = ['%s: %s' % (k, v) for k, v in req_headers.items()] curl = pycurl.Curl() - self.headers = {} + self.resp_headers = {} self.body = b'' # Do not follow redirects @@ -55,7 +55,7 @@ class robot_curl(robot_base): curl.setopt(pycurl.WRITEFUNCTION, self.body_callback) curl.setopt(pycurl.HTTPGET, 1) - curl.setopt(pycurl.HTTPHEADER, headers) + curl.setopt(pycurl.HTTPHEADER, req_headers) try: url.encode('ascii') except UnicodeEncodeError: @@ -73,8 +73,8 @@ class robot_curl(robot_base): if status >= 400: return "Error %d" % status, status, None, None, None if status >= 300: - return None, status, self.headers['Location'], None, None - return None, None, None, self.headers, self.body + return None, status, self.resp_headers['Location'], None, None + return None, None, None, self.resp_headers, self.body def header_callback(self, data): for encoding in 'ascii', 'latin1', 'utf-8': @@ -89,7 +89,7 @@ class robot_curl(robot_base): return if ':' in data: key, value = data.split(':', 1) - self.headers[key.title()] = value.strip() + self.resp_headers[key.title()] = value.strip() def body_callback(self, data): self.body += data diff --git a/Robots/bkmk_rrequests.py b/Robots/bkmk_rrequests.py index c9d5f68..40e6ef7 100644 --- a/Robots/bkmk_rrequests.py +++ b/Robots/bkmk_rrequests.py @@ -28,7 +28,7 @@ class robot_requests(robot_base): def version_str(self): return 'python-requests urllib3/%s' % urllib3.__version__ - async def get(self, url, headers, use_proxy=False): + async def get(self, url, req_headers, use_proxy=False): if url.startswith('ftp://'): error, welcome, body = _get_ftp(url, self.timeout) if error is not None: @@ -46,7 +46,7 @@ class robot_requests(robot_base): error = r = None try: - r = s.get(url, headers=headers, timeout=self.timeout, + r = s.get(url, headers=req_headers, timeout=self.timeout, allow_redirects=False, proxies=proxies, verify=False) except requests.RequestException as e: diff --git a/Robots/multi_async_mixin.py b/Robots/multi_async_mixin.py index 8126417..7056979 100644 --- a/Robots/multi_async_mixin.py +++ b/Robots/multi_async_mixin.py @@ -43,11 +43,11 @@ class multi_async_mixin(multi_mixin): current_href.set(bookmark.href) await self.check_bookmark_async(bookmark) - async def get_url(self, url, headers): + async def get_url(self, url, req_headers): if url not in self.logs: self.logs[url] = [] current_href.set(url) - return await super(multi_async_mixin, self).get_url(url, headers) + return await super(multi_async_mixin, self).get_url(url, req_headers) def wait(self): self.loop.run_until_complete(self.wait_async()) -- 2.39.5