From bc2cb38b5a82feaa6ba1d6a5cf4dccaf673b3e27 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Tue, 25 Feb 2025 21:16:39 +0300 Subject: [PATCH] Feat: Add robot based on httpx --- Robots/bkmk_rhttpx.py | 53 ++++++++++++++++++++++++++++++++++++++ Robots/bkmk_rmultihttpx.py | 19 ++++++++++++++ bkmk_db-venv | 2 +- doc/TODO | 2 -- robots.py | 4 +-- setup.py | 1 + 6 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 Robots/bkmk_rhttpx.py create mode 100644 Robots/bkmk_rmultihttpx.py diff --git a/Robots/bkmk_rhttpx.py b/Robots/bkmk_rhttpx.py new file mode 100644 index 0000000..b1451c6 --- /dev/null +++ b/Robots/bkmk_rhttpx.py @@ -0,0 +1,53 @@ +"""Robot based on httpx + +This file is a part of Bookmarks database and Internet robot. + +""" + +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 2025 PhiloSoft Design" +__license__ = "GNU GPL" + +__all__ = ['robot_httpx'] + + +import httpx +import socksio + +from Robots.base import robot_base +from Robots.util import get_ftp + + +class robot_httpx(robot_base): + def version_str(self): + return 'python-httpx/%s' % httpx.__version__ + + async def get(self, url, req_headers, use_proxy=False): + if url.startswith('ftp://'): + error, welcome, body = get_ftp(url, self.timeout) + if error is not None: + return error, None, None, None + self.welcome = welcome + return None, None, None, body + + if use_proxy: + proxy = self.proxy + else: + proxy = None + + error = r = None + try: + r = httpx.get(url, headers=req_headers, + timeout=httpx.Timeout(self.timeout), + follow_redirects=False, proxy=proxy, + verify=False) + except (httpx.RequestError, socksio.ProtocolError) as e: + error = str(e) + return error, None, None, None + + return None, r.status_code, r.headers, r.content + + def get_ftp_welcome(self): + welcome = self.welcome + self.welcome = '' + return welcome diff --git a/Robots/bkmk_rmultihttpx.py b/Robots/bkmk_rmultihttpx.py new file mode 100644 index 0000000..6915c2e --- /dev/null +++ b/Robots/bkmk_rmultihttpx.py @@ -0,0 +1,19 @@ +"""Robot based on httpx and concurrent.futures, +processes multiple URLs in parallel (multiprocess). + +This file is a part of Bookmarks database and Internet robot. + +""" + +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 2025 PhiloSoft Design" +__license__ = "GNU GPL" + +__all__ = ['robot_multihttpx'] + + +from Robots.concurrent_futures import cf_multiprocess + + +class robot_multihttpx(cf_multiprocess): + robot_name = 'httpx' diff --git a/bkmk_db-venv b/bkmk_db-venv index 6961f5d..ec5e137 100644 --- a/bkmk_db-venv +++ b/bkmk_db-venv @@ -9,7 +9,7 @@ if [ -z "$VIRTUAL_ENV" ]; then . bkmk_db-venv/bin/activate && pip install --compile --upgrade setuptools \ beautifulsoup4 lxml m_lib.full \ - "requests[socks]" \ + "requests[socks]" "httpx[socks]" \ aiohttp aiohttp-socks "aioftp[socks]" } fi diff --git a/doc/TODO b/doc/TODO index b6bfa18..ee6d011 100644 --- a/doc/TODO +++ b/doc/TODO @@ -1,5 +1,3 @@ -https://pypi.org/project/httpx/ - https://pypi.org/project/curl-cffi/ https://pypi.org/project/fake-useragent/ diff --git a/robots.py b/robots.py index 2b80404..c84a992 100644 --- a/robots.py +++ b/robots.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 1997-2024 PhiloSoft Design" +__copyright__ = "Copyright (C) 1997-2025 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['import_robot', 'robot'] @@ -15,7 +15,7 @@ from os import environ from bkmk_objects import parse_params, set_params robot_names, robot_params = parse_params( - environ.get("BKMK_ROBOT", "multirequests,aio")) + environ.get("BKMK_ROBOT", "multirequests,multihttpx,aio")) def import_robot(robot_name): diff --git a/setup.py b/setup.py index dc949cc..237f0e7 100755 --- a/setup.py +++ b/setup.py @@ -38,6 +38,7 @@ setup( extras_require={ 'html': ['beautifulsoup4', 'lxml'], 'requests': ['requests[socks]'], + 'httpx': ['httpx[socks]'], 'aiohttp': ['aiohttp>=3', 'aiohttp-socks', 'aioftp[socks]'], }, ) -- 2.39.5