From 27d04395e10919fe08fb52b85ceaaa00543596de Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Wed, 6 Mar 2024 18:43:48 +0300 Subject: [PATCH] Fix(Robot): Stop splitting and un-splitting URLs Pass `bookmark.href` as is. --- Robots/bkmk_robot_base.py | 18 +++++------------- bkmk_objects.py | 25 +------------------------ doc/ANNOUNCE | 8 ++++++++ doc/ChangeLog | 6 ++++-- 4 files changed, 18 insertions(+), 39 deletions(-) diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index 3fd567c..a03d5c1 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -12,7 +12,7 @@ __all__ = ['robot_base', 'get_error'] from base64 import b64encode -from urllib.parse import urlsplit, urljoin +from urllib.parse import urljoin import sys import socket import time @@ -78,16 +78,8 @@ class robot_base(Robot): self.start = int(time.time()) bookmark.icon = None - split_results = urlsplit(bookmark.href) - url_type, netloc, url_path, query, url_tag = split_results - url_host = split_results.hostname - - if query: - url = "%s://%s%s?%s" % (url_type, url_host, url_path, query) - else: - url = "%s://%s%s" % (url_type, url_host, url_path) error, redirect_code, redirect_to, headers, content = \ - self.get(bookmark, url, True) + self.get(bookmark, bookmark.href, True) if error: bookmark.error = error @@ -175,8 +167,7 @@ class robot_base(Robot): icon = None if not icon: icon = "/favicon.ico" - icon_url = urljoin( - "%s://%s%s" % (url_type, url_host, url_path), icon) + icon_url = urljoin(bookmark.href, icon) self.log(" looking for icon at: %s" % icon_url) if icon_url in icons: if icons[icon_url]: @@ -280,7 +271,8 @@ class robot_base(Robot): self.log(" no header: %s" % key) md5 = md5wrapper() - if url_type == "ftp": # Pass welcome message through MD5 + if bookmark.href.startswith("ftp://"): + # Pass welcome message through MD5 ftp_welcome = self.get_ftp_welcome() if not isinstance(ftp_welcome, bytes): ftp_welcome = ftp_welcome.encode(charset or 'utf-8') diff --git a/bkmk_objects.py b/bkmk_objects.py index d672bcf..bf5904b 100644 --- a/bkmk_objects.py +++ b/bkmk_objects.py @@ -14,7 +14,7 @@ __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot', ] -from urllib.parse import urlsplit, quote, unquote +from urllib.parse import unquote import os BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA") @@ -58,29 +58,6 @@ class Bookmark(object): def __init__(self, href, add_date, last_visit=None, last_modified=None, keyword=None, comment='', icon_href=None, icon=None, charset=None, parser_charset=None): - split_results = urlsplit(href) - protocol, netloc, path, query, tag = split_results - user = split_results.username - password = split_results.password - host = split_results.hostname - port = split_results.port - - if protocol == 'place': - href = protocol + ":" - else: - href = protocol + "://" - if user: - href += quote(user) - if password: - href += ':' + quote(password) - href += '@' - if host: - href += host.encode('idna').decode('ascii') - if port: - href += ':%d' % port - if path: - href += path - self.href = href self.add_date = add_date self.last_visit = last_visit diff --git a/doc/ANNOUNCE b/doc/ANNOUNCE index afdd1a7..75cf230 100644 --- a/doc/ANNOUNCE +++ b/doc/ANNOUNCE @@ -6,6 +6,14 @@ WHAT IS IT bookmarks.html. WHAT'S NEW +Version 5.3.1 (2024-03-??) + + Renamed check_urls.py to check_urls_db.py. + + Renamed check_url.py to check_urls.py. + + Stop splitting and un-splitting URLs. Pass bookmark.href as is. + Version 5.3.0 (2024-03-06) Added get_url.py: a script to get one file from an URL. diff --git a/doc/ChangeLog b/doc/ChangeLog index 19d2d5d..2b4f65d 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,8 +1,10 @@ Version 5.3.1 (2024-03-??) - Renamed check_urls.py to check_urls_db.py + Renamed check_urls.py to check_urls_db.py. - Renamed check_url.py to check_urls.py + Renamed check_url.py to check_urls.py. + + Stop splitting and un-splitting URLs. Pass bookmark.href as is. Version 5.3.0 (2024-03-06) -- 2.39.2