]> git.phdru.name Git - bookmarks_db.git/commitdiff
Feat: Add robot based on httpx
authorOleg Broytman <phd@phdru.name>
Tue, 25 Feb 2025 18:16:39 +0000 (21:16 +0300)
committerOleg Broytman <phd@phdru.name>
Tue, 25 Feb 2025 18:16:39 +0000 (21:16 +0300)
Robots/bkmk_rhttpx.py [new file with mode: 0644]
Robots/bkmk_rmultihttpx.py [new file with mode: 0644]
bkmk_db-venv
doc/TODO
robots.py
setup.py

diff --git a/Robots/bkmk_rhttpx.py b/Robots/bkmk_rhttpx.py
new file mode 100644 (file)
index 0000000..b1451c6
--- /dev/null
@@ -0,0 +1,53 @@
+"""Robot based on httpx
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2025 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_httpx']
+
+
+import httpx
+import socksio
+
+from Robots.base import robot_base
+from Robots.util import get_ftp
+
+
+class robot_httpx(robot_base):
+    def version_str(self):
+        return 'python-httpx/%s' % httpx.__version__
+
+    async def get(self, url, req_headers, use_proxy=False):
+        if url.startswith('ftp://'):
+            error, welcome, body = get_ftp(url, self.timeout)
+            if error is not None:
+                return error, None, None, None
+            self.welcome = welcome
+            return None, None, None, body
+
+        if use_proxy:
+            proxy = self.proxy
+        else:
+            proxy = None
+
+        error = r = None
+        try:
+            r = httpx.get(url, headers=req_headers,
+                          timeout=httpx.Timeout(self.timeout),
+                          follow_redirects=False, proxy=proxy,
+                          verify=False)
+        except (httpx.RequestError, socksio.ProtocolError) as e:
+            error = str(e)
+            return error, None, None, None
+
+        return None, r.status_code, r.headers, r.content
+
+    def get_ftp_welcome(self):
+        welcome = self.welcome
+        self.welcome = ''
+        return welcome
diff --git a/Robots/bkmk_rmultihttpx.py b/Robots/bkmk_rmultihttpx.py
new file mode 100644 (file)
index 0000000..6915c2e
--- /dev/null
@@ -0,0 +1,19 @@
+"""Robot based on httpx and concurrent.futures,
+processes multiple URLs in parallel (multiprocess).
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2025 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_multihttpx']
+
+
+from Robots.concurrent_futures import cf_multiprocess
+
+
+class robot_multihttpx(cf_multiprocess):
+    robot_name = 'httpx'
index 6961f5d3628a62d3c5537089fc6e50631c535472..ec5e1377bde50bda0199474ca451615bd2c055c8 100644 (file)
@@ -9,7 +9,7 @@ if [ -z "$VIRTUAL_ENV" ]; then
          . bkmk_db-venv/bin/activate &&
          pip install --compile --upgrade setuptools \
          beautifulsoup4 lxml m_lib.full \
-         "requests[socks]" \
+         "requests[socks]" "httpx[socks]" \
          aiohttp aiohttp-socks "aioftp[socks]"
     }
 fi
index b6bfa185bc666c42fb095c73a34570524ba3545f..ee6d011dbb70ca86cbef2abe574defc0a8567b5d 100644 (file)
--- a/doc/TODO
+++ b/doc/TODO
@@ -1,5 +1,3 @@
-https://pypi.org/project/httpx/
-
 https://pypi.org/project/curl-cffi/
 
 https://pypi.org/project/fake-useragent/
index 2b804043bfdedf27409da7dee341b702ceb736bd..c84a99231a1475fb8107251c97087911028e57ea 100644 (file)
--- a/robots.py
+++ b/robots.py
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2024 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2025 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['import_robot', 'robot']
@@ -15,7 +15,7 @@ from os import environ
 from bkmk_objects import parse_params, set_params
 
 robot_names, robot_params = parse_params(
-    environ.get("BKMK_ROBOT", "multirequests,aio"))
+    environ.get("BKMK_ROBOT", "multirequests,multihttpx,aio"))
 
 
 def import_robot(robot_name):
index dc949ccd0473b0fb8d62658fda24de7468326696..237f0e7e6de7491c95abed4ae7840bb814139f00 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -38,6 +38,7 @@ setup(
     extras_require={
         'html': ['beautifulsoup4', 'lxml'],
         'requests': ['requests[socks]'],
+        'httpx': ['httpx[socks]'],
         'aiohttp': ['aiohttp>=3', 'aiohttp-socks', 'aioftp[socks]'],
     },
 )