--- /dev/null
+"""Robot based on twisted and concurrent.futures,
+processes multiple URLs in parallel (multithreaded).
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2024 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_twisted']
+
+
+from time import sleep
+
+from twisted import __version__
+from twisted.internet import reactor
+from twisted.web.client import Agent, readBody
+from twisted.web.http_headers import Headers
+
+from Robots.base import encode_url
+from Robots.concurrent_futures import cf_multithread
+
+from twisted.internet import _sslverify
+_sslverify.platformTrust = lambda: None
+
+
+class robot_twisted(cf_multithread):
+ def __init__(self, *args, **kw):
+ cf_multithread.__init__(self, *args, **kw)
+ self.executor.submit(reactor.run, installSignalHandlers=False)
+
+ def version_str(self):
+ return super(cf_multithread, self).version_str() \
+ + '; Twisted ' + __version__
+
+ def cbRequest(self, response, queue, timeoutCall):
+ if timeoutCall.active():
+ timeoutCall.cancel()
+ d = readBody(response)
+ d.addCallback(self.cbBody, response, queue)
+ return d
+
+ def cbBody(self, body, response, queue):
+ queue.put_nowait(
+ (None, response.code,
+ {k.decode('ascii').title(): v[0].decode('ascii')
+ for k, v in response.headers.getAllRawHeaders()},
+ body)
+ )
+
+ def cbError(self, failure, queue, timeoutCall):
+ if timeoutCall.active():
+ timeoutCall.cancel()
+ queue.put_nowait(('Error: %s' % failure,
+ None, None, None))
+
+ def cancelTimeout(self, passthrough, timeoutCall):
+ if timeoutCall.active():
+ timeoutCall.cancel()
+ return passthrough
+
+ def main_thread(self):
+ """Main loop: create twisted agent and HTTP queries"""
+
+ agent = Agent(reactor, connectTimeout=self.timeout)
+
+ while True:
+ if self.queue.empty():
+ pass
+ else:
+ request = self.queue.get_nowait()
+ if request is None: # Signal to stop
+ reactor.stop()
+ return
+ url, req_headers, use_proxy, queue = request
+
+ try:
+ url.encode('ascii')
+ except UnicodeEncodeError:
+ url = encode_url(url)
+ req_headers = {k: [v] for k, v in req_headers.items()}
+ try:
+ d = agent.request(b'GET', url.encode('ascii'),
+ Headers(req_headers))
+ except Exception as e:
+ queue.put_nowait(('Error: %s' % e,
+ None, None, None))
+ continue
+
+ # Setup timeout watch
+ if url.startswith('ftp://'):
+ timeout = self.ftp_timeout
+ else:
+ timeout = self.timeout
+ timeoutCall = reactor.callLater(timeout, d.cancel)
+ d.addBoth(self.cancelTimeout, timeoutCall)
+
+ d.addCallback(self.cbRequest, queue, timeoutCall)
+ d.addErrback(self.cbError, queue, timeoutCall)
+
+ sleep(0.1)
+
+ def get_ftp_welcome(self):
+ return '' # We don't store welcome message yet
WHAT'S NEW
-Version 6.1.0 (2024-09-08)
+Version 6.2.0 (2024-??-??)
- Combined aiohttp with multiaio; the combined robot is named just aio.
+ Robot based on twisted and concurrent.futures,
+ processes multiple URLs in parallel (multithreaded).
- Robot based on curl_multi, processes multiple URLs in parallel
- using concurrent.futures (multithreaded). Doesn't work good --
- slow and a number of problems; need more work.
-
- Combined curl with curlmulti; the combined robot is named just curl.
-
- Default list of robots is now multirequests,aio.
-
- Make bkmk_rmultirequests always multiprocess.
-
-Version 6.0.0 (2024-08-19)
-
- Robot based on aiohttp, processes multiple URLs in parallel.
-
- Default list of robots is now multirequests,multiaio,curl.
-
- Make all robots async.
- Split check_bookmark() into sync and async variants.
-
- Renamed max_workers to max_urls.
+ Default list of robots is now multirequests,aio,twisted.
WHERE TO GET