From: Oleg Broytman Date: Wed, 5 Mar 2025 15:27:23 +0000 (+0300) Subject: Feat(Robots): Robots based on `curl-cffi` X-Git-Tag: 6.4.0~9 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=24480f2e12cb95dda92709efea953f1f5c7b1d22;p=bookmarks_db.git Feat(Robots): Robots based on `curl-cffi` --- diff --git a/Robots/bkmk_rcurlcffi.py b/Robots/bkmk_rcurlcffi.py new file mode 100644 index 0000000..4d863a5 --- /dev/null +++ b/Robots/bkmk_rcurlcffi.py @@ -0,0 +1,53 @@ +"""Robot based on curl-cffi + +This file is a part of Bookmarks database and Internet robot. + +""" + +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 2025 PhiloSoft Design" +__license__ = "GNU GPL" + +__all__ = ['robot_curlcffi'] + + +from curl_cffi import requests +import curl_cffi + +from Robots.base import robot_base +from Robots.util import get_ftp + + +class robot_curlcffi(robot_base): + def version_str(self): + return 'curl-cffi/%s' % curl_cffi.__version__ + + async def get(self, url, req_headers, use_proxy=False): + if url.startswith('ftp://'): + error, welcome, body = get_ftp(url, self.timeout) + if error is not None: + return error, None, None, None + self.welcome = welcome + return None, None, None, body + + if use_proxy: + proxies = {'http': self.proxy, 'https': self.proxy} + else: + proxies = None + + error = r = None + try: + r = requests.get(url, headers=req_headers, + timeout=self.timeout, + allow_redirects=False, proxies=proxies, + verify=False, impersonate='firefox133') + except curl_cffi.CurlError as e: + error = str(e) + return error, None, None, None + + return None, r.status_code, r.headers, r.content + + def get_ftp_welcome(self): + welcome = self.welcome + self.welcome = '' + return welcome diff --git a/Robots/bkmk_rmulticurlcffi.py b/Robots/bkmk_rmulticurlcffi.py new file mode 100644 index 0000000..d4bde6c --- /dev/null +++ b/Robots/bkmk_rmulticurlcffi.py @@ -0,0 +1,19 @@ +"""Robot based on curl-cffi and concurrent.futures, +processes multiple URLs in parallel (multiprocess). + +This file is a part of Bookmarks database and Internet robot. + +""" + +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 2025 PhiloSoft Design" +__license__ = "GNU GPL" + +__all__ = ['robot_multicurlcffi'] + + +from Robots.concurrent_futures import cf_multiprocess + + +class robot_multicurlcffi(cf_multiprocess): + robot_name = 'curlcffi' diff --git a/bkmk_db-venv b/bkmk_db-venv index ec5e137..e02eb86 100644 --- a/bkmk_db-venv +++ b/bkmk_db-venv @@ -9,7 +9,7 @@ if [ -z "$VIRTUAL_ENV" ]; then . bkmk_db-venv/bin/activate && pip install --compile --upgrade setuptools \ beautifulsoup4 lxml m_lib.full \ - "requests[socks]" "httpx[socks]" \ + "requests[socks]" "httpx[socks]" pycurl curl-cffi \ aiohttp aiohttp-socks "aioftp[socks]" } fi diff --git a/doc/ANNOUNCE b/doc/ANNOUNCE index af83c03..612401b 100644 --- a/doc/ANNOUNCE +++ b/doc/ANNOUNCE @@ -7,6 +7,10 @@ bookmarks.html. WHAT'S NEW +Version 6.4.0 (2025-??-??) + + Robots based on curl-cffi. + Version 6.3.0 (2025-03-02) Robots based on pycurl. diff --git a/doc/ChangeLog b/doc/ChangeLog index bb54f4a..176f6f0 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +Version 6.4.0 (2025-??-??) + + Robots based on curl-cffi. + Version 6.3.0 (2025-03-02) Robots based on pycurl. diff --git a/robots.py b/robots.py index c84a992..ea293c6 100644 --- a/robots.py +++ b/robots.py @@ -15,7 +15,8 @@ from os import environ from bkmk_objects import parse_params, set_params robot_names, robot_params = parse_params( - environ.get("BKMK_ROBOT", "multirequests,multihttpx,aio")) + environ.get("BKMK_ROBOT", + "multicurlcffi,multirequests,multihttpx,curlcffi,aio")) def import_robot(robot_name): diff --git a/setup.py b/setup.py index c8d9c0c..e6c2503 100755 --- a/setup.py +++ b/setup.py @@ -40,6 +40,7 @@ setup( 'requests': ['requests[socks]'], 'httpx': ['httpx[socks]'], 'curl': 'pycurl', + 'curlcffi': 'curl-cffi', 'aiohttp': ['aiohttp>=3', 'aiohttp-socks', 'aioftp[socks]'], }, )