__all__ = ['robot_twisted']
+from urllib.parse import urlsplit
from time import sleep
from twisted import __version__
from twisted.internet import reactor
-from twisted.web.client import Agent, readBody
+from twisted.internet.endpoints import TCP4ClientEndpoint
+from twisted.web.client import Agent, ProxyAgent, readBody
from twisted.web.http_headers import Headers
from Robots.base import encode_url
def main_thread(self):
"""Main loop: create twisted agent and HTTP queries"""
- agent = Agent(reactor, connectTimeout=self.timeout)
+ direct_agent = Agent(reactor, connectTimeout=self.timeout)
+
+ if self.proxy and self.proxy.startswith('http'):
+ proxy = urlsplit(self.proxy)
+ endpoint = TCP4ClientEndpoint(
+ reactor, proxy.hostname, proxy.port, timeout=self.timeout)
+ proxy_agent = ProxyAgent(endpoint)
while True:
if self.queue.empty():
except UnicodeEncodeError:
url = encode_url(url)
req_headers = {k: [v] for k, v in req_headers.items()}
+ if use_proxy:
+ agent = proxy_agent
+ else:
+ agent = direct_agent
try:
d = agent.request(b'GET', url.encode('ascii'),
Headers(req_headers))
Robot based on twisted and concurrent.futures,
processes multiple URLs in parallel (multithreaded).
+ Doesn't properly support proxies; has problems with HTTP proxy
+ and doesn't support SOCKS5 proxy at all.
+ Doesn't query FTP; requires more work.
- Default list of robots is now multirequests,aio,twisted.
+ Default list of robots is still multirequests,aio.
WHERE TO GET
Robot based on twisted and concurrent.futures,
processes multiple URLs in parallel (multithreaded).
+ Doesn't properly support proxies; has problems with HTTP proxy
+ and doesn't support SOCKS5 proxy at all.
+ Doesn't query FTP; requires more work.
- Default list of robots is now multirequests,aio,twisted.
+ Default list of robots is still multirequests,aio.
Version 6.1.0 (2024-09-08)
from bkmk_objects import parse_params, set_params
robot_names, robot_params = parse_params(
- environ.get("BKMK_ROBOT", "multirequests,aio,twisted"))
+ environ.get("BKMK_ROBOT", "multirequests,aio"))
def import_robot(robot_name):