]> git.phdru.name Git - bookmarks_db.git/commitdiff
Feat(Robots): Simple robot based on `twisted.web`
authorOleg Broytman <phd@phdru.name>
Sat, 8 Mar 2025 15:28:38 +0000 (18:28 +0300)
committerOleg Broytman <phd@phdru.name>
Sat, 8 Mar 2025 21:42:34 +0000 (00:42 +0300)
Robots/bkmk_rtwisted.py [new file with mode: 0644]
bkmk_db-venv-twisted [new file with mode: 0644]
doc/ANNOUNCE
doc/ChangeLog
doc/TODO

diff --git a/Robots/bkmk_rtwisted.py b/Robots/bkmk_rtwisted.py
new file mode 100644 (file)
index 0000000..287975c
--- /dev/null
@@ -0,0 +1,100 @@
+"""Simple robot based on twisted.web
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2025 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_twisted']
+
+import threading
+import time
+
+from twisted.internet import reactor, _sslverify
+from twisted.web.client import Agent, readBody
+from twisted.web.http_headers import Headers
+import twisted
+
+from Robots.base import robot_base
+from Robots.util import encode_url, get_ftp
+
+
+_sslverify.platformTrust = lambda: None
+
+reactor_thread = threading.Thread(target=reactor.run,
+                                  kwargs={'installSignalHandlers': False})
+reactor_thread.start()
+
+
+class robot_twisted(robot_base):
+    def version_str(self):
+        return 'twisted/%s' % twisted.version
+
+    async def get(self, url, req_headers, use_proxy=False):
+        if url.startswith('ftp://'):
+            error, welcome, body = get_ftp(url, self.timeout)
+            if error is not None:
+                return error, None, None, None
+            self.welcome = welcome
+            return None, None, None, body
+
+        agent = Agent(reactor, connectTimeout=self.timeout)
+        url = encode_url(url).encode('ascii')
+        _headers = {k: [v] for k, v in req_headers.items()}
+        self.response = self.error = self.body = None
+        d = agent.request(b"GET", url,
+                          Headers(_headers), None)
+        d.addCallbacks(self.cbResponse, self.cbError)
+
+        for i in range(self.timeout*10):
+            if self.error is None \
+                    and self.body is None:
+                time.sleep(0.1)
+
+        if self.error is None:
+            if self.body is None:
+                return 'Timeout', None, None, None
+        else:
+            error = self.error.getErrorMessage()
+            return error, None, None, None
+
+        r = self.response
+        resp_headers = {k.decode('ascii'): decode_header(v[0])
+                        for k, v in r.headers.getAllRawHeaders()}
+        return None, r.code, resp_headers, self.body
+
+    def get_ftp_welcome(self):
+        welcome = self.welcome
+        self.welcome = ''
+        return welcome
+
+    def stop(self):
+        reactor.stop()
+        reactor_thread.join(self.timeout)
+
+    def cbResponse(self, response):
+        self.response = response
+
+        d = readBody(response)
+        d.addCallback(self.cbBody)
+        return d
+
+    def cbError(self, error):
+        self.error = error
+
+    def cbBody(self, body):
+        self.body = body
+
+
+def decode_header(header):
+    for encoding in 'ascii', 'latin1', 'utf-8':
+        try:
+            header = header.decode(encoding)
+        except UnicodeDecodeError:
+            pass
+        else:
+            return header
+        return ''
diff --git a/bkmk_db-venv-twisted b/bkmk_db-venv-twisted
new file mode 100644 (file)
index 0000000..343d424
--- /dev/null
@@ -0,0 +1,8 @@
+# This is not a shell script; it shouldn't be executed,
+# it should be sourced into the current shell.
+
+if [ -z "$VIRTUAL_ENV" ]; then
+    . bkmk_db-venv &&
+    pip install --compile --upgrade \
+    "twisted[tls]"
+fi
index 6a1c3163352335124d07b0d97885358767b56d4d..e3cce4697e22f00dbc015173a76766e4cd880f19 100644 (file)
@@ -7,6 +7,10 @@ bookmarks.html.
 
 WHAT'S NEW
 
+Version 6.5.0 (2025-??-??)
+
+   Robots based on Twisted.
+
 Version 6.4.0 (2025-03-07)
 
    Robots based on curl-cffi.
index c50d0c78736654b1847e847bdcf50c51d1345a12..0adbcffb540f054590a1d35b39fd1cd8e2ec391f 100644 (file)
@@ -1,3 +1,7 @@
+Version 6.5.0 (2025-??-??)
+
+   Robots based on Twisted.
+
 Version 6.4.0 (2025-03-07)
 
    Robots based on curl-cffi.
index 385b0bacff9ac5be425c014f5b5c8dc556607cde..046a9d5cb6856cf6acbf9e05454684326fe76e96 100644 (file)
--- a/doc/TODO
+++ b/doc/TODO
@@ -1,5 +1,3 @@
-Twisted.
-
 Selenium or PlayWright.
 
 A program to publish bookmarks with icons.