"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['robot_base', 'get_error']
from base64 import b64encode
+from urllib.parse import urlsplit, urljoin
import sys
import socket
import time
-try:
- from urllib.parse import splittype, splithost, splittag, urljoin
-except ImportError:
- from urllib import splittype, splithost, splittag
- from urlparse import urljoin
from m_lib.md5wrapper import md5wrapper
from m_lib.net.www.util import parse_time
self.start = int(time.time())
bookmark.icon = None
- url_type, url_rest = splittype(bookmark.href)
- url_host, url_path = splithost(url_rest)
- url_path, url_tag = splittag(url_path) # noqa: E221
- # multiple spaces before operator
+ split_results = urlsplit(bookmark.href)
+ url_type, netloc, url_path, query, url_tag = split_results
+ url_host = split_results.hostname
url = "%s://%s%s" % (url_type, url_host, url_path)
error, redirect_code, redirect_to, headers, content = \
break
content_stripped = content.strip()
if content_stripped and charset:
- content_stripped = content_stripped.decode(
- charset, 'replace')
+ try:
+ content_stripped = content_stripped.decode(
+ charset, 'replace')
+ except LookupError:
+ charset = None
+ self.log(" unknown charset "
+ "in Content-Type header")
if content_stripped and is_html:
parser = parse_html(
content_stripped, charset, self.log)
bookmark.icon
)
else:
- self.log(" no icon :"
+ self.log(" no icon : "
"bad content type '%s'"
% content_type
)