self.start = int(time.time())
bookmark.icon = None
+ if bookmark.charset:
+ headers = request_headers.copy()
+ headers['Accept-Charset'] = bookmark.charset
+ else:
+ headers = request_headers
+
error, http_status_code, redirect_to, headers, content = \
- await self.get_url(bookmark.href, bookmark.charset)
+ await self.get_url(bookmark.href, headers)
if error is not None:
bookmark.error = error
icon_error, \
icon_status_code, icon_redirect_to, \
icon_headers, icon_data = \
- await self.get_url(_icon_url)
+ await self.get_url(
+ _icon_url, request_headers)
if icon_error:
raise IOError("No icon: " + icon_error)
break
finally:
self.finish_check_url(bookmark)
- async def get_url(self, url, accept_charset=None):
+ async def get_url(self, url, headers):
split_results = urlsplit(url)
url_proto = split_results.scheme
url_host = split_results.hostname
if use_proxy and url_host in self.proxy_ok:
self.log(' Immediately trying with the proxy')
error, http_status_code, redirect_to, headers, content = \
- await self.get(url, accept_charset=accept_charset,
- use_proxy=True)
+ await self.get(url, headers, use_proxy=True)
else:
error, http_status_code, redirect_to, headers, content = \
- await self.get(url, accept_charset=accept_charset)
+ await self.get(url, headers)
if error is not None and (
not url_host.startswith('localhost') and
not url_host.startswith('127.')
if use_proxy and http_status_code != 404:
self.log(' Retrying with the proxy...')
error, http_status_code, redirect_to, headers, content = \
- await self.get(url, accept_charset=accept_charset,
- use_proxy=True)
+ await self.get(url, headers, use_proxy=True)
if error is None:
self.proxy_ok.add(url_host)
if (error is not None) or (
import aiohttp
import aiohttp.client_exceptions
-from Robots.base import robot_base, request_headers
+from Robots.base import robot_base
class robot_aiohttp(robot_base):
def version_str(self):
return 'aiohttp/%s' % aiohttp.__version__
- async def get(self, url, accept_charset=None, use_proxy=False):
+ async def get(self, url, headers, use_proxy=False):
if url.startswith('ftp://'):
error, body = await _get_ftp(
url, timeout=self.ftp_timeout,
return error, None, None, None, None
return None, None, None, None, body
- if accept_charset:
- headers = request_headers.copy()
- headers['Accept-Charset'] = accept_charset
- else:
- headers = request_headers
-
if use_proxy:
proxy = self.proxy
else:
from urllib.parse import urlsplit, parse_qsl, quote, quote_plus, urlencode
-from m_lib.defenc import default_encoding
import certifi
import pycurl
-from Robots.base import robot_base, request_headers
+from Robots.base import robot_base
class robot_curl(robot_base):
def version_str(self):
return str(pycurl.version)
- async def get(self, url, accept_charset=None, use_proxy=False):
- if accept_charset:
- headers = request_headers.copy()
- headers['Accept-Charset'] = accept_charset
- else:
- headers = request_headers
+ async def get(self, url, headers, use_proxy=False):
headers = ['%s: %s' % (k, v) for k, v in headers.items()]
curl = pycurl.Curl()
try:
url.encode('ascii')
except UnicodeEncodeError:
- url = encode_url(url, accept_charset)
+ url = encode_url(url)
curl.setopt(pycurl.URL, url)
try:
curl.perform()
return '' # We don't store welcome message yet
-def encode_url(url, encoding):
- if not encoding:
- encoding = default_encoding
-
+def encode_url(url, encoding='latin1'):
split_results = urlsplit(url)
protocol, netloc, path, query, tag = split_results
user = split_results.username
if query:
qlist = []
for name, value in parse_qsl(query):
- if isinstance(name, bytes):
- name = name.decode(default_encoding)
- value = value.decode(default_encoding)
- name = name.encode(encoding)
- value = value.encode(encoding)
+ if not isinstance(name, bytes):
+ name = name.encode(encoding)
+ value = value.encode(encoding)
qlist.append((name, value))
url = protocol + "://"
if user:
- if isinstance(user, bytes):
- user = user.decode(default_encoding)
url += quote(user.encode(encoding))
if password:
- if isinstance(password, bytes):
- password = password.decode(default_encoding)
url += ':' + quote(password.encode(encoding))
url += '@'
if host:
if protocol == "file":
url += quote(path)
else:
- if isinstance(path, bytes):
- path = path.decode(default_encoding)
url += quote(path.encode(encoding))
if query:
url += '?' + urlencode(qlist)
if tag:
- if isinstance(tag, bytes):
- tag = tag.decode(default_encoding)
url += '#' + quote_plus(tag.encode(encoding))
return url
current_href.set(bookmark.href)
await self.check_bookmark_async(bookmark)
- async def get_url(self, url, accept_charset=None):
+ async def get_url(self, url, headers):
if url not in self.logs:
self.logs[url] = []
current_href.set(url)
- return await super(robot_multiaio, self).get_url(
- url, accept_charset=accept_charset)
+ return await super(robot_multiaio, self).get_url(url, headers)
def wait(self):
self.loop.run_until_complete(self.wait_async())
import requests
import urllib3
-from Robots.base import robot_base, request_headers
+from Robots.base import robot_base
class robot_requests(robot_base):
def version_str(self):
return 'python-requests urllib3/%s' % urllib3.__version__
- async def get(self, url, accept_charset=None, use_proxy=False):
+ async def get(self, url, headers, use_proxy=False):
if url.startswith('ftp://'):
error, welcome, body = _get_ftp(url, self.timeout)
if error is not None:
self.welcome = welcome
return None, None, None, None, body
- if accept_charset:
- headers = request_headers.copy()
- headers['Accept-Charset'] = accept_charset
- else:
- headers = request_headers
-
if use_proxy:
proxies = {'http': self.proxy, 'https': self.proxy}
else: