]> git.phdru.name Git - bookmarks_db.git/commitdiff
Fix(Robot): Stop splitting and un-splitting URLs 5.3.1
authorOleg Broytman <phd@phdru.name>
Wed, 6 Mar 2024 15:43:48 +0000 (18:43 +0300)
committerOleg Broytman <phd@phdru.name>
Thu, 7 Mar 2024 13:34:29 +0000 (16:34 +0300)
Pass `bookmark.href` as is.

Robots/bkmk_robot_base.py
bkmk_objects.py
doc/ANNOUNCE
doc/ChangeLog

index 3fd567cda2f0c232017d910e4a66526efcec9d75..a03d5c1b16ec430ab17ac8f899655ae9cf9808bb 100644 (file)
@@ -12,7 +12,7 @@ __all__ = ['robot_base', 'get_error']
 
 
 from base64 import b64encode
-from urllib.parse import urlsplit, urljoin
+from urllib.parse import urljoin
 import sys
 import socket
 import time
@@ -78,16 +78,8 @@ class robot_base(Robot):
             self.start = int(time.time())
             bookmark.icon = None
 
-            split_results = urlsplit(bookmark.href)
-            url_type, netloc, url_path, query, url_tag = split_results
-            url_host = split_results.hostname
-
-            if query:
-                url = "%s://%s%s?%s" % (url_type, url_host, url_path, query)
-            else:
-                url = "%s://%s%s" % (url_type, url_host, url_path)
             error, redirect_code, redirect_to, headers, content = \
-                self.get(bookmark, url, True)
+                self.get(bookmark, bookmark.href, True)
 
             if error:
                 bookmark.error = error
@@ -175,8 +167,7 @@ class robot_base(Robot):
                             icon = None
                         if not icon:
                             icon = "/favicon.ico"
-                        icon_url = urljoin(
-                            "%s://%s%s" % (url_type, url_host, url_path), icon)
+                        icon_url = urljoin(bookmark.href, icon)
                         self.log("   looking for icon at: %s" % icon_url)
                         if icon_url in icons:
                             if icons[icon_url]:
@@ -280,7 +271,8 @@ class robot_base(Robot):
                     self.log("   no header: %s" % key)
 
             md5 = md5wrapper()
-            if url_type == "ftp":  # Pass welcome message through MD5
+            if bookmark.href.startswith("ftp://"):
+                # Pass welcome message through MD5
                 ftp_welcome = self.get_ftp_welcome()
                 if not isinstance(ftp_welcome, bytes):
                     ftp_welcome = ftp_welcome.encode(charset or 'utf-8')
index d672bcfc7f2850e0e7c98d5fec7adf7148d50b16..bf5904b3287d2872dd6e871398751bc3118c6700 100644 (file)
@@ -14,7 +14,7 @@ __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
            ]
 
 
-from urllib.parse import urlsplit, quote, unquote
+from urllib.parse import unquote
 import os
 
 BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
@@ -58,29 +58,6 @@ class Bookmark(object):
     def __init__(self, href, add_date, last_visit=None, last_modified=None,
                  keyword=None, comment='', icon_href=None, icon=None,
                  charset=None, parser_charset=None):
-        split_results = urlsplit(href)
-        protocol, netloc, path, query, tag = split_results
-        user = split_results.username
-        password = split_results.password
-        host = split_results.hostname
-        port = split_results.port
-
-        if protocol == 'place':
-            href = protocol + ":"
-        else:
-            href = protocol + "://"
-        if user:
-            href += quote(user)
-            if password:
-                href += ':' + quote(password)
-            href += '@'
-        if host:
-            href += host.encode('idna').decode('ascii')
-            if port:
-                href += ':%d' % port
-        if path:
-            href += path
-
         self.href = href
         self.add_date = add_date
         self.last_visit = last_visit
index afdd1a7a469387e42aa12610b8547cab0b418bfe..75cf230571467f036d7e91cb415227113faffb8e 100644 (file)
@@ -6,6 +6,14 @@ WHAT IS IT
 bookmarks.html.
 
 WHAT'S NEW
+Version 5.3.1 (2024-03-??)
+
+   Renamed check_urls.py to check_urls_db.py.
+
+   Renamed check_url.py to check_urls.py.
+
+   Stop splitting and un-splitting URLs. Pass bookmark.href as is.
+
 Version 5.3.0 (2024-03-06)
 
    Added get_url.py: a script to get one file from an URL.
index 19d2d5dcd7d6e7cc43e86793bf1d4d0f7ba6fe05..2b4f65d810bf9bb0ce615658680180bb8eebc677 100644 (file)
@@ -1,8 +1,10 @@
 Version 5.3.1 (2024-03-??)
 
-   Renamed check_urls.py to check_urls_db.py
+   Renamed check_urls.py to check_urls_db.py.
 
-   Renamed check_url.py to check_urls.py
+   Renamed check_url.py to check_urls.py.
+
+   Stop splitting and un-splitting URLs. Pass bookmark.href as is.
 
 Version 5.3.0 (2024-03-06)