]> git.phdru.name Git - bookmarks_db.git/commitdiff
Feat(Robots): Robots based on `curl-cffi`
authorOleg Broytman <phd@phdru.name>
Wed, 5 Mar 2025 15:27:23 +0000 (18:27 +0300)
committerOleg Broytman <phd@phdru.name>
Wed, 5 Mar 2025 15:27:23 +0000 (18:27 +0300)
Robots/bkmk_rcurlcffi.py [new file with mode: 0644]
Robots/bkmk_rmulticurlcffi.py [new file with mode: 0644]
bkmk_db-venv
doc/ANNOUNCE
doc/ChangeLog
robots.py
setup.py

diff --git a/Robots/bkmk_rcurlcffi.py b/Robots/bkmk_rcurlcffi.py
new file mode 100644 (file)
index 0000000..4d863a5
--- /dev/null
@@ -0,0 +1,53 @@
+"""Robot based on curl-cffi
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2025 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_curlcffi']
+
+
+from curl_cffi import requests
+import curl_cffi
+
+from Robots.base import robot_base
+from Robots.util import get_ftp
+
+
+class robot_curlcffi(robot_base):
+    def version_str(self):
+        return 'curl-cffi/%s' % curl_cffi.__version__
+
+    async def get(self, url, req_headers, use_proxy=False):
+        if url.startswith('ftp://'):
+            error, welcome, body = get_ftp(url, self.timeout)
+            if error is not None:
+                return error, None, None, None
+            self.welcome = welcome
+            return None, None, None, body
+
+        if use_proxy:
+            proxies = {'http': self.proxy, 'https': self.proxy}
+        else:
+            proxies = None
+
+        error = r = None
+        try:
+            r = requests.get(url, headers=req_headers,
+                             timeout=self.timeout,
+                             allow_redirects=False, proxies=proxies,
+                             verify=False, impersonate='firefox133')
+        except curl_cffi.CurlError as e:
+            error = str(e)
+            return error, None, None, None
+
+        return None, r.status_code, r.headers, r.content
+
+    def get_ftp_welcome(self):
+        welcome = self.welcome
+        self.welcome = ''
+        return welcome
diff --git a/Robots/bkmk_rmulticurlcffi.py b/Robots/bkmk_rmulticurlcffi.py
new file mode 100644 (file)
index 0000000..d4bde6c
--- /dev/null
@@ -0,0 +1,19 @@
+"""Robot based on curl-cffi and concurrent.futures,
+processes multiple URLs in parallel (multiprocess).
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2025 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_multicurlcffi']
+
+
+from Robots.concurrent_futures import cf_multiprocess
+
+
+class robot_multicurlcffi(cf_multiprocess):
+    robot_name = 'curlcffi'
index ec5e1377bde50bda0199474ca451615bd2c055c8..e02eb86d6c33d18a0b861b0726914278307372c9 100644 (file)
@@ -9,7 +9,7 @@ if [ -z "$VIRTUAL_ENV" ]; then
          . bkmk_db-venv/bin/activate &&
          pip install --compile --upgrade setuptools \
          beautifulsoup4 lxml m_lib.full \
-         "requests[socks]" "httpx[socks]" \
+         "requests[socks]" "httpx[socks]" pycurl curl-cffi \
          aiohttp aiohttp-socks "aioftp[socks]"
     }
 fi
index af83c036136b66cc0a126037a4bec094a472b54e..612401b65b7aba27cf277fce73158cd649e32276 100644 (file)
@@ -7,6 +7,10 @@ bookmarks.html.
 
 WHAT'S NEW
 
+Version 6.4.0 (2025-??-??)
+
+   Robots based on curl-cffi.
+
 Version 6.3.0 (2025-03-02)
 
    Robots based on pycurl.
index bb54f4a8a8dcc579a12102f89856fea99a7bdb34..176f6f0d6e77213fe43f0fce52ebcaeb4746d465 100644 (file)
@@ -1,3 +1,7 @@
+Version 6.4.0 (2025-??-??)
+
+   Robots based on curl-cffi.
+
 Version 6.3.0 (2025-03-02)
 
    Robots based on pycurl.
index c84a99231a1475fb8107251c97087911028e57ea..ea293c671b3e09a264ad93adfbbdd989c004e812 100644 (file)
--- a/robots.py
+++ b/robots.py
@@ -15,7 +15,8 @@ from os import environ
 from bkmk_objects import parse_params, set_params
 
 robot_names, robot_params = parse_params(
-    environ.get("BKMK_ROBOT", "multirequests,multihttpx,aio"))
+    environ.get("BKMK_ROBOT",
+                "multicurlcffi,multirequests,multihttpx,curlcffi,aio"))
 
 
 def import_robot(robot_name):
index c8d9c0ce8fc661d739a1ae126ad5afe3a46954bd..e6c250361824a9156445ed20ef09ed953ce08e30 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -40,6 +40,7 @@ setup(
         'requests': ['requests[socks]'],
         'httpx': ['httpx[socks]'],
         'curl': 'pycurl',
+        'curlcffi': 'curl-cffi',
         'aiohttp': ['aiohttp>=3', 'aiohttp-socks', 'aioftp[socks]'],
     },
 )