]> git.phdru.name Git - bookmarks_db.git/commitdiff
Refactor(Robots): Pass headers instead of charset
authorOleg Broytman <phd@phdru.name>
Tue, 20 Aug 2024 22:21:26 +0000 (01:21 +0300)
committerOleg Broytman <phd@phdru.name>
Sat, 7 Sep 2024 10:59:02 +0000 (13:59 +0300)
Robots/base.py
Robots/bkmk_raiohttp.py
Robots/bkmk_rcurl.py
Robots/bkmk_rmultiaio.py
Robots/bkmk_rrequests.py

index d9551be25ed4dfb6785e97d39b75b8864f2c74d9..e3cf4614f0e4c5aa303f110830230f40f7928e2f 100644 (file)
@@ -105,8 +105,14 @@ class robot_base(Robot):
             self.start = int(time.time())
             bookmark.icon = None
 
+            if bookmark.charset:
+                headers = request_headers.copy()
+                headers['Accept-Charset'] = bookmark.charset
+            else:
+                headers = request_headers
+
             error, http_status_code, redirect_to, headers, content = \
-                await self.get_url(bookmark.href, bookmark.charset)
+                await self.get_url(bookmark.href, headers)
 
             if error is not None:
                 bookmark.error = error
@@ -221,7 +227,8 @@ class robot_base(Robot):
                                     icon_error, \
                                         icon_status_code, icon_redirect_to, \
                                         icon_headers, icon_data = \
-                                        await self.get_url(_icon_url)
+                                        await self.get_url(
+                                            _icon_url, request_headers)
                                     if icon_error:
                                         raise IOError("No icon: " + icon_error)
                                         break
@@ -345,7 +352,7 @@ class robot_base(Robot):
         finally:
             self.finish_check_url(bookmark)
 
-    async def get_url(self, url, accept_charset=None):
+    async def get_url(self, url, headers):
         split_results = urlsplit(url)
         url_proto = split_results.scheme
         url_host = split_results.hostname
@@ -367,11 +374,10 @@ class robot_base(Robot):
         if use_proxy and url_host in self.proxy_ok:
             self.log('   Immediately trying with the proxy')
             error, http_status_code, redirect_to, headers, content = \
-                await self.get(url, accept_charset=accept_charset,
-                               use_proxy=True)
+                await self.get(url, headers, use_proxy=True)
         else:
             error, http_status_code, redirect_to, headers, content = \
-                await self.get(url, accept_charset=accept_charset)
+                await self.get(url, headers)
             if error is not None and (
                 not url_host.startswith('localhost') and
                 not url_host.startswith('127.')
@@ -380,8 +386,7 @@ class robot_base(Robot):
                 if use_proxy and http_status_code != 404:
                     self.log('   Retrying with the proxy...')
                     error, http_status_code, redirect_to, headers, content = \
-                        await self.get(url, accept_charset=accept_charset,
-                                       use_proxy=True)
+                        await self.get(url, headers, use_proxy=True)
                     if error is None:
                         self.proxy_ok.add(url_host)
         if (error is not None) or (
index 30294c0dfd3a71241b1b6b4bc4310798159123f7..fa593608d519de92fb5688be8588e35cd90b999c 100644 (file)
@@ -22,14 +22,14 @@ import aioftp
 import aiohttp
 import aiohttp.client_exceptions
 
-from Robots.base import robot_base, request_headers
+from Robots.base import robot_base
 
 
 class robot_aiohttp(robot_base):
     def version_str(self):
         return 'aiohttp/%s' % aiohttp.__version__
 
-    async def get(self, url, accept_charset=None, use_proxy=False):
+    async def get(self, url, headers, use_proxy=False):
         if url.startswith('ftp://'):
             error, body = await _get_ftp(
                 url, timeout=self.ftp_timeout,
@@ -39,12 +39,6 @@ class robot_aiohttp(robot_base):
                 return error, None, None, None, None
             return None, None, None, None, body
 
-        if accept_charset:
-            headers = request_headers.copy()
-            headers['Accept-Charset'] = accept_charset
-        else:
-            headers = request_headers
-
         if use_proxy:
             proxy = self.proxy
         else:
index 4e31b7e2ec6debc63961675590f31e914d143813..d59f4b4cbb3b9cf7c83400468ea4d80ff8aa4bb1 100644 (file)
@@ -13,23 +13,17 @@ __all__ = ['robot_curl']
 
 from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode
 
-from m_lib.defenc import default_encoding
 import certifi
 import pycurl
 
-from Robots.base import robot_base, request_headers
+from Robots.base import robot_base
 
 
 class robot_curl(robot_base):
     def version_str(self):
         return str(pycurl.version)
 
-    async def get(self, url, accept_charset=None, use_proxy=False):
-        if accept_charset:
-            headers = request_headers.copy()
-            headers['Accept-Charset'] = accept_charset
-        else:
-            headers = request_headers
+    async def get(self, url, headers, use_proxy=False):
         headers = ['%s: %s' % (k, v) for k, v in headers.items()]
 
         curl = pycurl.Curl()
@@ -63,7 +57,7 @@ class robot_curl(robot_base):
         try:
             url.encode('ascii')
         except UnicodeEncodeError:
-            url = encode_url(url, accept_charset)
+            url = encode_url(url)
         curl.setopt(pycurl.URL, url)
         try:
             curl.perform()
@@ -102,10 +96,7 @@ class robot_curl(robot_base):
         return ''  # We don't store welcome message yet
 
 
-def encode_url(url, encoding):
-    if not encoding:
-        encoding = default_encoding
-
+def encode_url(url, encoding='latin1'):
     split_results = urlsplit(url)
     protocol, netloc, path, query, tag = split_results
     user = split_results.username
@@ -116,21 +107,15 @@ def encode_url(url, encoding):
     if query:
         qlist = []
         for name, value in parse_qsl(query):
-            if isinstance(name, bytes):
-                name = name.decode(default_encoding)
-                value = value.decode(default_encoding)
-            name = name.encode(encoding)
-            value = value.encode(encoding)
+            if not isinstance(name, bytes):
+                name = name.encode(encoding)
+                value = value.encode(encoding)
             qlist.append((name, value))
 
     url = protocol + "://"
     if user:
-        if isinstance(user, bytes):
-            user = user.decode(default_encoding)
         url += quote(user.encode(encoding))
         if password:
-            if isinstance(password, bytes):
-                password = password.decode(default_encoding)
             url += ':' + quote(password.encode(encoding))
         url += '@'
     if host:
@@ -143,14 +128,10 @@ def encode_url(url, encoding):
         if protocol == "file":
             url += quote(path)
         else:
-            if isinstance(path, bytes):
-                path = path.decode(default_encoding)
             url += quote(path.encode(encoding))
     if query:
         url += '?' + urlencode(qlist)
     if tag:
-        if isinstance(tag, bytes):
-            tag = tag.decode(default_encoding)
         url += '#' + quote_plus(tag.encode(encoding))
 
     return url
index 62da6807c59770bbd48b27f0764c9d36149f9a75..d4b3342d2c6ce579f9ada8452cbc018a0adea1f7 100644 (file)
@@ -50,12 +50,11 @@ class robot_multiaio(multi_mixin, robot_aiohttp):
         current_href.set(bookmark.href)
         await self.check_bookmark_async(bookmark)
 
-    async def get_url(self, url, accept_charset=None):
+    async def get_url(self, url, headers):
         if url not in self.logs:
             self.logs[url] = []
         current_href.set(url)
-        return await super(robot_multiaio, self).get_url(
-            url, accept_charset=accept_charset)
+        return await super(robot_multiaio, self).get_url(url, headers)
 
     def wait(self):
         self.loop.run_until_complete(self.wait_async())
index dd5a120b9ec1a33b7772f644aadda01b3dd04850..c9d5f68c002b87ae5cbc9c31faf3eca479671972 100644 (file)
@@ -21,14 +21,14 @@ from requests.packages.urllib3.util.ssl_ import create_urllib3_context
 import requests
 import urllib3
 
-from Robots.base import robot_base, request_headers
+from Robots.base import robot_base
 
 
 class robot_requests(robot_base):
     def version_str(self):
         return 'python-requests urllib3/%s' % urllib3.__version__
 
-    async def get(self, url, accept_charset=None, use_proxy=False):
+    async def get(self, url, headers, use_proxy=False):
         if url.startswith('ftp://'):
             error, welcome, body = _get_ftp(url, self.timeout)
             if error is not None:
@@ -36,12 +36,6 @@ class robot_requests(robot_base):
             self.welcome = welcome
             return None, None, None, None, body
 
-        if accept_charset:
-            headers = request_headers.copy()
-            headers['Accept-Charset'] = accept_charset
-        else:
-            headers = request_headers
-
         if use_proxy:
             proxies = {'http': self.proxy, 'https': self.proxy}
         else: