]> git.phdru.name Git - bookmarks_db.git/commitdiff
Refactor(Robots): Separate `headers` into `req_headers` and `resp_headers`
authorOleg Broytman <phd@phdru.name>
Wed, 21 Aug 2024 14:32:25 +0000 (17:32 +0300)
committerOleg Broytman <phd@phdru.name>
Sat, 7 Sep 2024 10:59:02 +0000 (13:59 +0300)
Robots/base.py
Robots/bkmk_raiohttp.py
Robots/bkmk_rcurl.py
Robots/bkmk_rrequests.py
Robots/multi_async_mixin.py

index e3cf4614f0e4c5aa303f110830230f40f7928e2f..36b369a4db1b1f32699b40208cbb826e5baa2bfa 100644 (file)
@@ -106,13 +106,13 @@ class robot_base(Robot):
             bookmark.icon = None
 
             if bookmark.charset:
-                headers = request_headers.copy()
-                headers['Accept-Charset'] = bookmark.charset
+                req_headers = request_headers.copy()
+                req_headers['Accept-Charset'] = bookmark.charset
             else:
-                headers = request_headers
+                req_headers = request_headers
 
-            error, http_status_code, redirect_to, headers, content = \
-                await self.get_url(bookmark.href, headers)
+            error, http_status_code, redirect_to, resp_headers, content = \
+                await self.get_url(bookmark.href, req_headers)
 
             if error is not None:
                 bookmark.error = error
@@ -125,14 +125,14 @@ class robot_base(Robot):
             size = 0
             last_modified = None
 
-            if headers:
+            if resp_headers:
                 try:
-                    size = headers["Content-Length"]
+                    size = resp_headers["Content-Length"]
                 except KeyError:
                     pass
 
                 try:
-                    last_modified = headers["Last-Modified"]
+                    last_modified = resp_headers["Last-Modified"]
                 except KeyError:
                     pass
 
@@ -151,9 +151,9 @@ class robot_base(Robot):
             bookmark.last_modified = last_modified
 
             charset = None
-            if headers:
+            if resp_headers:
                 try:
-                    content_type = headers["Content-Type"]
+                    content_type = resp_headers["Content-Type"]
                     self.log("   Content-Type   : %s" % content_type)
                     if content_type is None:
                         if b'html' in content.lower():
@@ -352,7 +352,7 @@ class robot_base(Robot):
         finally:
             self.finish_check_url(bookmark)
 
-    async def get_url(self, url, headers):
+    async def get_url(self, url, req_headers):
         split_results = urlsplit(url)
         url_proto = split_results.scheme
         url_host = split_results.hostname
@@ -373,11 +373,11 @@ class robot_base(Robot):
 
         if use_proxy and url_host in self.proxy_ok:
             self.log('   Immediately trying with the proxy')
-            error, http_status_code, redirect_to, headers, content = \
-                await self.get(url, headers, use_proxy=True)
+            error, http_status_code, redirect_to, resp_headers, content = \
+                await self.get(url, req_headers, use_proxy=True)
         else:
-            error, http_status_code, redirect_to, headers, content = \
-                await self.get(url, headers)
+            error, http_status_code, redirect_to, resp_headers, content = \
+                await self.get(url, req_headers)
             if error is not None and (
                 not url_host.startswith('localhost') and
                 not url_host.startswith('127.')
@@ -385,8 +385,8 @@ class robot_base(Robot):
                 self.log('   Error          : %s' % error)
                 if use_proxy and http_status_code != 404:
                     self.log('   Retrying with the proxy...')
-                    error, http_status_code, redirect_to, headers, content = \
-                        await self.get(url, headers, use_proxy=True)
+                    error, http_status_code, redirect_to, resp_headers, content = \
+                        await self.get(url, req_headers, use_proxy=True)
                     if error is None:
                         self.proxy_ok.add(url_host)
         if (error is not None) or (
@@ -399,7 +399,7 @@ class robot_base(Robot):
             return error, http_status_code, None, None, None
         if http_status_code and (http_status_code >= 300):
             return None, http_status_code, redirect_to, None, None
-        return None, None, None, headers, content
+        return None, None, None, resp_headers, content
 
     def set_redirect(self, bookmark, errcode, newurl):
         bookmark.moved = moved = "(%s) to %s" % (_reloc_dict[errcode], newurl)
index fa593608d519de92fb5688be8588e35cd90b999c..56cbf773fc434eb02939681076b5b93124da8712 100644 (file)
@@ -29,7 +29,7 @@ class robot_aiohttp(robot_base):
     def version_str(self):
         return 'aiohttp/%s' % aiohttp.__version__
 
-    async def get(self, url, headers, use_proxy=False):
+    async def get(self, url, req_headers, use_proxy=False):
         if url.startswith('ftp://'):
             error, body = await _get_ftp(
                 url, timeout=self.ftp_timeout,
@@ -45,7 +45,7 @@ class robot_aiohttp(robot_base):
             proxy = None
 
         error, status, resp_headers, body = await _get_http(
-            url, headers=headers, proxy=proxy,
+            url, req_headers, proxy=proxy,
             timeout=self.timeout,
         )
         if error is not None or (status and status >= 400):
@@ -64,7 +64,7 @@ class robot_aiohttp(robot_base):
         return ''  # We don't store welcome message yet
 
 
-async def _get_http(url, headers={}, proxy=None, timeout=60):
+async def _get_http(url, req_headers={}, proxy=None, timeout=60):
     connector = None
     if proxy and proxy.startswith('socks5'):
         if proxy.startswith('socks5h://'):
@@ -87,7 +87,8 @@ async def _get_http(url, headers={}, proxy=None, timeout=60):
             connector=connector, timeout=timeout
         ) as session:
             async with session.get(
-                    url, headers=headers, proxy=proxy, allow_redirects=False,
+                    url, headers=req_headers,
+                    proxy=proxy, allow_redirects=False,
                     ssl_context=ssl_context) as resp:
                 return None, resp.status, resp.headers, await resp.read()
     except (
index a144805893dbda4816f6ef17afdcfc4b5f808018..3c7b4df261312ca6be71699a462c8cadcd50ce8d 100644 (file)
@@ -23,11 +23,11 @@ class robot_curl(robot_base):
     def version_str(self):
         return str(pycurl.version)
 
-    async def get(self, url, headers, use_proxy=False):
-        headers = ['%s: %s' % (k, v) for k, v in headers.items()]
+    async def get(self, url, req_headers, use_proxy=False):
+        req_headers = ['%s: %s' % (k, v) for k, v in req_headers.items()]
 
         curl = pycurl.Curl()
-        self.headers = {}
+        self.resp_headers = {}
         self.body = b''
 
         # Do not follow redirects
@@ -55,7 +55,7 @@ class robot_curl(robot_base):
         curl.setopt(pycurl.WRITEFUNCTION, self.body_callback)
 
         curl.setopt(pycurl.HTTPGET, 1)
-        curl.setopt(pycurl.HTTPHEADER, headers)
+        curl.setopt(pycurl.HTTPHEADER, req_headers)
         try:
             url.encode('ascii')
         except UnicodeEncodeError:
@@ -73,8 +73,8 @@ class robot_curl(robot_base):
         if status >= 400:
             return "Error %d" % status, status, None, None, None
         if status >= 300:
-            return None, status, self.headers['Location'], None, None
-        return None, None, None, self.headers, self.body
+            return None, status, self.resp_headers['Location'], None, None
+        return None, None, None, self.resp_headers, self.body
 
     def header_callback(self, data):
         for encoding in 'ascii', 'latin1', 'utf-8':
@@ -89,7 +89,7 @@ class robot_curl(robot_base):
             return
         if ':' in data:
             key, value = data.split(':', 1)
-            self.headers[key.title()] = value.strip()
+            self.resp_headers[key.title()] = value.strip()
 
     def body_callback(self, data):
         self.body += data
index c9d5f68c002b87ae5cbc9c31faf3eca479671972..40e6ef793610741f245bbf1d7324fd996662301a 100644 (file)
@@ -28,7 +28,7 @@ class robot_requests(robot_base):
     def version_str(self):
         return 'python-requests urllib3/%s' % urllib3.__version__
 
-    async def get(self, url, headers, use_proxy=False):
+    async def get(self, url, req_headers, use_proxy=False):
         if url.startswith('ftp://'):
             error, welcome, body = _get_ftp(url, self.timeout)
             if error is not None:
@@ -46,7 +46,7 @@ class robot_requests(robot_base):
 
         error = r = None
         try:
-            r = s.get(url, headers=headers, timeout=self.timeout,
+            r = s.get(url, headers=req_headers, timeout=self.timeout,
                       allow_redirects=False, proxies=proxies,
                       verify=False)
         except requests.RequestException as e:
index 8126417b1a65f793a44f07ae1d43b7ec102d9b13..7056979138446c9d93752b9a8f6042a892daaa52 100644 (file)
@@ -43,11 +43,11 @@ class multi_async_mixin(multi_mixin):
         current_href.set(bookmark.href)
         await self.check_bookmark_async(bookmark)
 
-    async def get_url(self, url, headers):
+    async def get_url(self, url, req_headers):
         if url not in self.logs:
             self.logs[url] = []
         current_href.set(url)
-        return await super(multi_async_mixin, self).get_url(url, headers)
+        return await super(multi_async_mixin, self).get_url(url, req_headers)
 
     def wait(self):
         self.loop.run_until_complete(self.wait_async())