From: Oleg Broytman Date: Mon, 19 Aug 2024 19:28:41 +0000 (+0300) Subject: Refactor(Robots): `get`/`get_url` don't need `bookmark`, only charset X-Git-Tag: 6.1.0~16 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=59d0d88e68c89bd815f00d6a71bb8f32e8a78eff;p=bookmarks_db.git Refactor(Robots): `get`/`get_url` don't need `bookmark`, only charset --- diff --git a/Robots/base.py b/Robots/base.py index 88f924b..3a15b56 100644 --- a/Robots/base.py +++ b/Robots/base.py @@ -102,7 +102,7 @@ class robot_base(Robot): bookmark.icon = None error, http_status_code, redirect_to, headers, content = \ - await self.get_url(bookmark, bookmark.href, True) + await self.get_url(bookmark.href, bookmark.charset) if error is not None: bookmark.error = error @@ -217,7 +217,7 @@ class robot_base(Robot): icon_error, \ icon_status_code, icon_redirect_to, \ icon_headers, icon_data = \ - await self.get_url(bookmark, _icon_url) + await self.get_url(_icon_url) if icon_error: raise IOError("No icon: " + icon_error) break @@ -341,7 +341,7 @@ class robot_base(Robot): finally: self.finish_check_url(bookmark) - async def get_url(self, bookmark, url, accept_charset=False): + async def get_url(self, url, accept_charset=None): split_results = urlsplit(url) url_proto = split_results.scheme url_host = split_results.hostname @@ -363,13 +363,11 @@ class robot_base(Robot): if use_proxy and url_host in self.proxy_ok: self.log(' Immediately trying with the proxy') error, http_status_code, redirect_to, headers, content = \ - await self.get(bookmark, url, - accept_charset=accept_charset, + await self.get(url, accept_charset=accept_charset, use_proxy=True) else: error, http_status_code, redirect_to, headers, content = \ - await self.get(bookmark, url, - accept_charset=accept_charset) + await self.get(url, accept_charset=accept_charset) if error is not None and ( not url_host.startswith('localhost') and not url_host.startswith('127.') @@ -378,8 +376,7 @@ class robot_base(Robot): if use_proxy and http_status_code != 404: self.log(' Retrying with the proxy...') error, http_status_code, redirect_to, headers, content = \ - await self.get(bookmark, url, - accept_charset=accept_charset, + await self.get(url, accept_charset=accept_charset, use_proxy=True) if error is None: self.proxy_ok.add(url_host) diff --git a/Robots/bkmk_raiohttp.py b/Robots/bkmk_raiohttp.py index fb377ae..30294c0 100644 --- a/Robots/bkmk_raiohttp.py +++ b/Robots/bkmk_raiohttp.py @@ -29,7 +29,7 @@ class robot_aiohttp(robot_base): def version_str(self): return 'aiohttp/%s' % aiohttp.__version__ - async def get(self, bookmark, url, accept_charset=False, use_proxy=False): + async def get(self, url, accept_charset=None, use_proxy=False): if url.startswith('ftp://'): error, body = await _get_ftp( url, timeout=self.ftp_timeout, @@ -39,9 +39,9 @@ class robot_aiohttp(robot_base): return error, None, None, None, None return None, None, None, None, body - if accept_charset and bookmark.charset: + if accept_charset: headers = request_headers.copy() - headers['Accept-Charset'] = bookmark.charset + headers['Accept-Charset'] = accept_charset else: headers = request_headers diff --git a/Robots/bkmk_rcurl.py b/Robots/bkmk_rcurl.py index 9f18d8c..4e31b7e 100644 --- a/Robots/bkmk_rcurl.py +++ b/Robots/bkmk_rcurl.py @@ -24,10 +24,10 @@ class robot_curl(robot_base): def version_str(self): return str(pycurl.version) - async def get(self, bookmark, url, accept_charset=False, use_proxy=False): - if accept_charset and bookmark.charset: + async def get(self, url, accept_charset=None, use_proxy=False): + if accept_charset: headers = request_headers.copy() - headers['Accept-Charset'] = bookmark.charset + headers['Accept-Charset'] = accept_charset else: headers = request_headers headers = ['%s: %s' % (k, v) for k, v in headers.items()] @@ -63,7 +63,7 @@ class robot_curl(robot_base): try: url.encode('ascii') except UnicodeEncodeError: - url = encode_url(url, bookmark.charset) + url = encode_url(url, accept_charset) curl.setopt(pycurl.URL, url) try: curl.perform() diff --git a/Robots/bkmk_rmultiaio.py b/Robots/bkmk_rmultiaio.py index a3214f6..62da680 100644 --- a/Robots/bkmk_rmultiaio.py +++ b/Robots/bkmk_rmultiaio.py @@ -50,12 +50,12 @@ class robot_multiaio(multi_mixin, robot_aiohttp): current_href.set(bookmark.href) await self.check_bookmark_async(bookmark) - async def get_url(self, bookmark, url, accept_charset=False): - if bookmark.href not in self.logs: - self.logs[bookmark.href] = [] - current_href.set(bookmark.href) + async def get_url(self, url, accept_charset=None): + if url not in self.logs: + self.logs[url] = [] + current_href.set(url) return await super(robot_multiaio, self).get_url( - bookmark, url, accept_charset=accept_charset) + url, accept_charset=accept_charset) def wait(self): self.loop.run_until_complete(self.wait_async()) diff --git a/Robots/bkmk_rrequests.py b/Robots/bkmk_rrequests.py index 2fd8331..dd5a120 100644 --- a/Robots/bkmk_rrequests.py +++ b/Robots/bkmk_rrequests.py @@ -28,7 +28,7 @@ class robot_requests(robot_base): def version_str(self): return 'python-requests urllib3/%s' % urllib3.__version__ - async def get(self, bookmark, url, accept_charset=False, use_proxy=False): + async def get(self, url, accept_charset=None, use_proxy=False): if url.startswith('ftp://'): error, welcome, body = _get_ftp(url, self.timeout) if error is not None: @@ -36,9 +36,9 @@ class robot_requests(robot_base): self.welcome = welcome return None, None, None, None, body - if accept_charset and bookmark.charset: + if accept_charset: headers = request_headers.copy() - headers['Accept-Charset'] = bookmark.charset + headers['Accept-Charset'] = accept_charset else: headers = request_headers