]> git.phdru.name Git - bookmarks_db.git/commitdiff
Feat(Robots): Robot based on aiohttp 5.5.0
authorOleg Broytman <phd@phdru.name>
Mon, 5 Aug 2024 12:00:55 +0000 (15:00 +0300)
committerOleg Broytman <phd@phdru.name>
Tue, 6 Aug 2024 07:52:04 +0000 (10:52 +0300)
Robots/bkmk_raiohttp.py [new file with mode: 0644]
Robots/bkmk_robot_base.py
bkmk_db-venv
doc/ANNOUNCE
doc/ChangeLog
doc/TODO
robots.py
setup.py

diff --git a/Robots/bkmk_raiohttp.py b/Robots/bkmk_raiohttp.py
new file mode 100644 (file)
index 0000000..ed4dac6
--- /dev/null
@@ -0,0 +1,62 @@
+"""Robot based on aiohttp
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2024 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_aiohttp']
+
+
+import asyncio
+import aiohttp
+import aiohttp.client_exceptions
+from Robots.bkmk_robot_base import robot_base, request_headers
+
+
+class robot_aiohttp(robot_base):
+    def version_str(self):
+        return 'aiohttp/%s' % aiohttp.__version__
+
+    def get(self, bookmark, url, accept_charset=False, use_proxy=False):
+        if accept_charset and bookmark.charset:
+            headers = request_headers.copy()
+            headers['Accept-Charset'] = bookmark.charset
+        else:
+            headers = request_headers
+
+        if use_proxy:
+            proxy = self.proxy
+        else:
+            proxy = None
+
+        error, status, resp_headers, body = asyncio.run(get(
+            url, headers=headers, proxy=proxy,
+            connect_timeout=self.connect_timeout, timeout=self.timeout,
+        ))
+        if error is not None or (status and status >= 400):
+            if error is None:
+                error = 'Error %d' % status
+            else:
+                error = str(error)
+                if status:
+                    error = 'Error %d %s' % (status, error)
+            return error, status, None, None, None
+        if status and status >= 300:
+            return None, status, resp_headers['Location'], None, None
+        return None, status, None, resp_headers, body
+
+
+async def get(url, headers={}, proxy=None, connect_timeout=30, timeout=60):
+    timeout = aiohttp.ClientTimeout(connect=connect_timeout, total=timeout)
+    try:
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.get(
+                    url, headers=headers, proxy=proxy,
+                    allow_redirects=False) as resp:
+                return None, resp.status, resp.headers, await resp.read()
+    except (asyncio.TimeoutError, aiohttp.client_exceptions.ClientError) as e:
+        return e, None, None, None
index a7429411b8e46eb81518ac4f1ade76325a42b6f9..dffffdb09645361c80ff5a0adc666b3dd8b3fdeb 100644 (file)
@@ -105,7 +105,7 @@ class robot_base(Robot):
             error, http_status_code, redirect_to, headers, content = \
                 self.smart_get(bookmark, bookmark.href, True)
 
-            if error:
+            if error is not None:
                 bookmark.error = error
                 return 1
 
@@ -169,7 +169,8 @@ class robot_base(Robot):
                             is_html = True
                             break
                     content_stripped = content.strip()
-                    if content_stripped and charset:
+                    if content_stripped and charset \
+                            and isinstance(content_stripped, bytes):
                         try:
                             content_stripped = content_stripped.decode(
                                 charset, 'replace')
@@ -371,13 +372,15 @@ class robot_base(Robot):
                                  use_proxy=True)
                     if error is None:
                         self.proxy_ok.add(url_host)
-        if error is not None:
+        if (error is not None) or (
+            http_status_code and (http_status_code >= 400)
+        ):
             if use_proxy:
                 self.log('   Proxy error    : %s' % error)
                 if url_host not in self.proxy_ok:
                     self.proxy_error.add(url_host)
             return error, http_status_code, None, None, None
-        if http_status_code:
+        if http_status_code and (http_status_code >= 300):
             return None, http_status_code, redirect_to, None, None
         return None, None, None, headers, content
 
index faed80bcdc1ebf038a19a51a09b0b4569355fe85..62d0d98e5916bc65a57217cc557f6bc78dd61c75 100644 (file)
@@ -9,6 +9,6 @@ if [ -z "$VIRTUAL_ENV" ]; then
          . bkmk_db-venv/bin/activate &&
          pip install --compile --upgrade beautifulsoup4 lxml m_lib.full \
          requests requests-ftp \
-         certifi pycurl
+         pycurl certifi aiohttp
     }
 fi
index aec114b8db3e63feb1f683f6cf9f886837fc43f2..3fb4356e567c76840212d8405dc23c981a499f5a 100644 (file)
@@ -7,6 +7,12 @@ bookmarks.html.
 
 WHAT'S NEW
 
+Version 5.5.0 (2024-08-06)
+
+   Robot based on aiohttp.
+
+   Default list of robots is now aiohttp,curl,requests,forking.
+
 Version 5.4.1 (2024-08-04)
 
    Fix(bkmk_rcurl): IDNA-encode URLs. PycURL doesn't encode URLs itself
index da565471f2bf940d0d04e9a19bf15a5cbb4a2532..cb8415da7c1a6bae5c869b5a86b5e7871d7b0fc6 100644 (file)
@@ -1,3 +1,9 @@
+Version 5.5.0 (2024-08-06)
+
+   Robot based on aiohttp.
+
+   Default list of robots is now aiohttp,curl,requests,forking.
+
 Version 5.4.1 (2024-08-04)
 
    Fix(bkmk_rcurl): IDNA-encode URLs. PycURL doesn't encode URLs itself
index 23564fc7977dfecd2deffc7f7522286a3c265ecf..d2e6573ebaaf072f1b3356080b8d4f412446942f 100644 (file)
--- a/doc/TODO
+++ b/doc/TODO
@@ -1,4 +1,4 @@
-Robot based on aiohttp.
+aioftp.
 
 Robot(s) that test many URLs in parallel.
 
index 575f9bdf8c2cbe9319d4aae50350e1bb8c4495a8..06ce1fecac127b96fff6fd6fb4c3654df4da9973 100644 (file)
--- a/robots.py
+++ b/robots.py
@@ -16,7 +16,7 @@ from os import environ
 from bkmk_objects import parse_params, set_params
 
 robot_names, robot_params = parse_params(
-    environ.get("BKMK_ROBOT", "curl,requests,forking"))
+    environ.get("BKMK_ROBOT", "aiohttp,curl,requests,forking"))
 
 
 def import_robot(robot_name):
index 27ef38b1d9690f8fdde8cb4c55c627733257dd9a..0a570ad02cea5184ca1d1c3b2a57850f6fff3cb7 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup
 
 setup(
     name='bookmarks_db',
-    version='5.4.1',
+    version='5.5.0',
     description='Bookmarks database and Internet robot',
     long_description=open('README', 'r').read(),
     long_description_content_type="text/plain",
@@ -41,5 +41,6 @@ setup(
         'html': ['beautifulsoup4', 'lxml'],
         'requests': ['requests', 'requests-ftp'],
         'curl': ['pycurl', 'certifi'],
+        'aiohttp:python_version>="3.4"': ['aiohttp'],
     },
 )