--- /dev/null
+"""Robot based on aiohttp
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2024 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_aiohttp']
+
+
+import asyncio
+import aiohttp
+import aiohttp.client_exceptions
+from Robots.bkmk_robot_base import robot_base, request_headers
+
+
+class robot_aiohttp(robot_base):
+ def version_str(self):
+ return 'aiohttp/%s' % aiohttp.__version__
+
+ def get(self, bookmark, url, accept_charset=False, use_proxy=False):
+ if accept_charset and bookmark.charset:
+ headers = request_headers.copy()
+ headers['Accept-Charset'] = bookmark.charset
+ else:
+ headers = request_headers
+
+ if use_proxy:
+ proxy = self.proxy
+ else:
+ proxy = None
+
+ error, status, resp_headers, body = asyncio.run(get(
+ url, headers=headers, proxy=proxy,
+ connect_timeout=self.connect_timeout, timeout=self.timeout,
+ ))
+ if error is not None or (status and status >= 400):
+ if error is None:
+ error = 'Error %d' % status
+ else:
+ error = str(error)
+ if status:
+ error = 'Error %d %s' % (status, error)
+ return error, status, None, None, None
+ if status and status >= 300:
+ return None, status, resp_headers['Location'], None, None
+ return None, status, None, resp_headers, body
+
+
+async def get(url, headers={}, proxy=None, connect_timeout=30, timeout=60):
+ timeout = aiohttp.ClientTimeout(connect=connect_timeout, total=timeout)
+ try:
+ async with aiohttp.ClientSession(timeout=timeout) as session:
+ async with session.get(
+ url, headers=headers, proxy=proxy,
+ allow_redirects=False) as resp:
+ return None, resp.status, resp.headers, await resp.read()
+ except (asyncio.TimeoutError, aiohttp.client_exceptions.ClientError) as e:
+ return e, None, None, None
error, http_status_code, redirect_to, headers, content = \
self.smart_get(bookmark, bookmark.href, True)
- if error:
+ if error is not None:
bookmark.error = error
return 1
is_html = True
break
content_stripped = content.strip()
- if content_stripped and charset:
+ if content_stripped and charset \
+ and isinstance(content_stripped, bytes):
try:
content_stripped = content_stripped.decode(
charset, 'replace')
use_proxy=True)
if error is None:
self.proxy_ok.add(url_host)
- if error is not None:
+ if (error is not None) or (
+ http_status_code and (http_status_code >= 400)
+ ):
if use_proxy:
self.log(' Proxy error : %s' % error)
if url_host not in self.proxy_ok:
self.proxy_error.add(url_host)
return error, http_status_code, None, None, None
- if http_status_code:
+ if http_status_code and (http_status_code >= 300):
return None, http_status_code, redirect_to, None, None
return None, None, None, headers, content
. bkmk_db-venv/bin/activate &&
pip install --compile --upgrade beautifulsoup4 lxml m_lib.full \
requests requests-ftp \
- certifi pycurl
+ pycurl certifi aiohttp
}
fi
WHAT'S NEW
+Version 5.5.0 (2024-08-06)
+
+ Robot based on aiohttp.
+
+ Default list of robots is now aiohttp,curl,requests,forking.
+
Version 5.4.1 (2024-08-04)
Fix(bkmk_rcurl): IDNA-encode URLs. PycURL doesn't encode URLs itself
+Version 5.5.0 (2024-08-06)
+
+ Robot based on aiohttp.
+
+ Default list of robots is now aiohttp,curl,requests,forking.
+
Version 5.4.1 (2024-08-04)
Fix(bkmk_rcurl): IDNA-encode URLs. PycURL doesn't encode URLs itself
-Robot based on aiohttp.
+aioftp.
Robot(s) that test many URLs in parallel.
from bkmk_objects import parse_params, set_params
robot_names, robot_params = parse_params(
- environ.get("BKMK_ROBOT", "curl,requests,forking"))
+ environ.get("BKMK_ROBOT", "aiohttp,curl,requests,forking"))
def import_robot(robot_name):
setup(
name='bookmarks_db',
- version='5.4.1',
+ version='5.5.0',
description='Bookmarks database and Internet robot',
long_description=open('README', 'r').read(),
long_description_content_type="text/plain",
'html': ['beautifulsoup4', 'lxml'],
'requests': ['requests', 'requests-ftp'],
'curl': ['pycurl', 'certifi'],
+ 'aiohttp:python_version>="3.4"': ['aiohttp'],
},
)