Feat: Dropped support for Python 2

author Oleg Broytman <phd@phdru.name>

Thu, 8 Aug 2024 04:45:58 +0000 (07:45 +0300)

committer Oleg Broytman <phd@phdru.name>

Thu, 15 Aug 2024 17:43:09 +0000 (20:43 +0300)
author Oleg Broytman <phd@phdru.name>
Thu, 8 Aug 2024 04:45:58 +0000 (07:45 +0300)
committer Oleg Broytman <phd@phdru.name>
Thu, 15 Aug 2024 17:43:09 +0000 (20:43 +0300)
diff --git a/Storage/bkmk_stjson.py b/Storage/bkmk_stjson.py

index 1d32fdc8af03de294ecc1eab4bad1f1a78ffaf10..9daf1bb58915376aa21428737e2fd51f6adf9b1a 100644 (file)
--- a/Storage/bkmk_stjson.py
+++ b/Storage/bkmk_stjson.py
@@ -5,17 +5,13 @@ This file is a part of Bookmarks database and Internet robot.
  """
  
  __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2024 PhiloSoft Design"
  __license__ = "GNU GPL"
  
  __all__ = ['storage_json']
  
  
-try:
-    import json
-except ImportError:
-    import simplejson as json
-
+import json
  from bkmk_objects import Folder, Bookmark, Ruler, Walker
  
  
diff --git a/Writers/bkmk_wflad.py b/Writers/bkmk_wflad.py

index 3b33de6bbebe5ce46a61ed0a679676894925f1e5..e273d340a69271f9901851559e7c0ed2f3375521 100644 (file)
--- a/Writers/bkmk_wflad.py
+++ b/Writers/bkmk_wflad.py
@@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot.
  """
  
  __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
  __license__ = "GNU GPL"
  
  __all__ = ['writer_flad']
@@ -12,7 +12,6 @@ __all__ = ['writer_flad']
  
  import time
  from bkmk_objects import Writer, bkmk_attrs
-from compat import unicode
  
  
  def strftime(s):
@@ -59,8 +58,6 @@ Comment: %s""" % (
          for attr, title in bkmk_attrs.items():
              if hasattr(b, attr):
                  value = getattr(b, attr)
-                #if isinstance(value, unicode):
-                #    value = value.encode('utf-8')
                  self.outfile.write("\n%s: %s" % (title, value))
  
          if hasattr(b, "last_tested"):
diff --git a/Writers/bkmk_whtml.py b/Writers/bkmk_whtml.py

index 05d9847ad148d3f5e3458bfb0022e6647bf7b15b..a9a02b51fadd451de0f50b96344ae61cbc5f8491 100644 (file)
--- a/Writers/bkmk_whtml.py
+++ b/Writers/bkmk_whtml.py
@@ -4,14 +4,13 @@ This file is a part of Bookmarks database and Internet robot.
  """
  
  __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
  __license__ = "GNU GPL"
  
  __all__ = ['writer_html']
  
  
  from bkmk_objects import Writer, BKMK_FORMAT, quote_title
-from compat import unicode
  
  
  def dump_comment(comment):
@@ -62,8 +61,6 @@ class writer_html(Writer):
              if b.keyword: self.outfile.write(' SHORTCUTURL="%s"' % b.keyword)
              if b.icon_href:
                  value = b.icon_href
-                #if isinstance(value, unicode):
-                #    value = value.encode('utf-8')
                  self.outfile.write(' ICON_URI="%s"' % value)
              if b.icon: self.outfile.write(' ICON="%s"' % b.icon)
              if b.charset: self.outfile.write(' LAST_CHARSET="%s"' % b.charset)
diff --git a/bkmk_parser.py b/bkmk_parser.py

index 997728f3b9ceba4072be9c7c872d02fda8ca763b..24dc7c6acc4868b695e6ebf7265a48917a7a177c 100644 (file)
--- a/bkmk_parser.py
+++ b/bkmk_parser.py
@@ -47,9 +47,6 @@ class BkmkParser(HTMLParser):
  
      def handle_data(self, data):
          if data:
-            #if self.charset and default_encoding:
-            #    data = data.decode(self.charset, "replace").\
-            #        encode(default_encoding, "xmlcharrefreplace")
              self.accumulator += data
  
      # Mozilla - get charset
diff --git a/compat.py b/compat.py

deleted file mode 100644 (file)

index b197445..0000000
--- a/compat.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import sys
-
-# Compatability definitions (inspired by six)
-PY2 = sys.version_info[0] < 3
-if PY2:
-    # disable flake8 checks on python 3
-    string_type = basestring  # noqa
-    unicode = unicode  # noqa
-    unichr = unichr  # noqa
-else:
-    string_type = str
-    unicode = str
-    unichr = chr
diff --git a/doc/ANNOUNCE b/doc/ANNOUNCE

index 5948b9cb22a8b254ba44f30f8dc1ed9c0052b275..7161011499603f46d8ebe6989f8294efcfb2f18a 100644 (file)
--- a/doc/ANNOUNCE
+++ b/doc/ANNOUNCE
@@ -11,6 +11,8 @@ Version 5.6.0 (2024-??-??)
  
     Removed urllib-based robots.
  
+   Dropped support for Python 2.
+
     Default list of robots is now curl,requests,aiohttp.
  
  
diff --git a/doc/ChangeLog b/doc/ChangeLog

index e4a50130a444479077fd919bb50887104c8949d9..38ecb852bd4f9a59423639e8002b7f112d265103 100644 (file)
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -2,6 +2,8 @@ Version 5.6.0 (2024-??-??)
  
     Removed urllib-based robots.
  
+   Dropped support for Python 2.
+
     Default list of robots is now curl,requests,aiohttp.
  
  Version 5.5.1 (2024-08-??)
diff --git a/parse_html/bkmk_parse_html.py b/parse_html/bkmk_parse_html.py

index 922c745346ecfa8f8f9c2988ef94f85345dc167a..d8168bd678a270688d979fb2680af8729cd56836 100644 (file)
--- a/parse_html/bkmk_parse_html.py
+++ b/parse_html/bkmk_parse_html.py
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
  """
  
  __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2024 PhiloSoft Design"
  __license__ = "GNU GPL"
  
  __all__ = ['parse_html', 'parse_filename', 'universal_charset']
@@ -14,12 +14,7 @@ __all__ = ['parse_html', 'parse_filename', 'universal_charset']
  import codecs
  import os
  import re
-try:
-    from html.entities import name2codepoint
-except ImportError:
-    from htmlentitydefs import name2codepoint
-
-from compat import unicode, unichr
+from html.entities import name2codepoint
  
  DEFAULT_CHARSET = "cp1251"  # Stupid default for Russian Cyrillic
  parsers = []
@@ -66,7 +61,7 @@ def recode_entities(title, charset):
                entity_re.match(part):
              _part = name2codepoint.get(part[1:-1], None)
              if _part is not None:
-                part = unichr(_part)
+                part = chr(_part)
          output.append(part)
      title = ''.join(output)
  
@@ -74,7 +69,7 @@ def recode_entities(title, charset):
      for part in num_entity_re.split(title):
          if num_entity_re.match(part):
              try:
-                part = unichr(int(part[2:-1]))
+                part = chr(int(part[2:-1]))
              except UnicodeEncodeError:
                  pass  # Leave the entity as is
          output.append(part)
@@ -134,24 +129,6 @@ def parse_html(html_text, charset=None, log=None):
          p, parser = _parsers[0]
      if log: log("   Using %s" % p.__module__)
  
-    #title = parser.title
-    #if isinstance(title, unicode):
-    #    if parser.charset:
-    #        parser.title = title.encode(parser.charset)
-    #    else:
-    #        try:
-    #            parser.title = title.encode('ascii')
-    #        except UnicodeEncodeError:
-    #            try:
-    #                parser.title = title.encode(DEFAULT_CHARSET)
-    #            except UnicodeEncodeError:
-    #                parser.title = title.encode(universal_charset)
-    #                parser.charset = universal_charset
-    #            else:
-    #                parser.charset = DEFAULT_CHARSET
-    #        else:
-    #            parser.charset = 'ascii'
-
      converted_title = title = parser.title
      if title and isinstance(title, bytes) and (not parser.charset):
          try:
@@ -170,23 +147,7 @@ def parse_html(html_text, charset=None, log=None):
                  if log: log("   META charset   : %s" % parser.charset)
              elif (not charset) or (charset != parser.charset):
                  if log: log("   guessed charset: %s" % parser.charset)
-            # if log: log("   current charset: %s" % universal_charset)
              if log: log("   title          : %s" % title)
-            #if parser.charset != universal_charset:
-            #    try:
-            #        converted_title = title.decode(parser.charset).\
-            #            encode(universal_charset)
-            #    except UnicodeError:
-            #        if log:
-            #            log("   incorrect conversion from %s,"
-            #                "converting from %s"
-            #                % (parser.charset, DEFAULT_CHARSET))
-            #        converted_title = \
-            #            title.decode(DEFAULT_CHARSET, "replace").\
-            #            encode(universal_charset, "replace")
-            #        parser.charset = DEFAULT_CHARSET
-            #if log and (converted_title != title):
-            #    log("   converted title: %s" % converted_title)
          except LookupError:
              if log: log("   unknown charset: '%s'" % parser.charset)
      else:
@@ -200,13 +161,6 @@ def parse_html(html_text, charset=None, log=None):
              log("   final title    : %s" % final_title)
          parser.title = final_title
  
-    #icon = parser.icon
-    #if isinstance(icon, unicode):
-    #    try:
-    #        parser.icon = icon.encode('ascii')
-    #    except UnicodeEncodeError:
-    #        if parser.charset:
-    #            parser.icon = icon.encode(parser.charset)
      return parser
  
  
diff --git a/parse_html/bkmk_ph_beautifulsoup.py b/parse_html/bkmk_ph_beautifulsoup.py

deleted file mode 100644 (file)

index 0aad3dd..0000000
--- a/parse_html/bkmk_ph_beautifulsoup.py
+++ /dev/null
@@ -1,166 +0,0 @@
-"""HTML Parser using BeautifulSoup
-
-This file is a part of Bookmarks database and Internet robot.
-
-"""
-
-__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2007-2023 PhiloSoft Design"
-__license__ = "GNU GPL"
-
-__all__ = ['parse_html']
-
-
-import re
-from sgmllib import SGMLParser, SGMLParseError
-from BeautifulSoup import BeautifulSoup, CData
-
-from .bkmk_ph_util import HTMLParser
-from compat import string_type
-
-DEFAULT_CHARSET = "cp1251"  # Stupid default for Russian Cyrillic
-
-# http://groups.google.com/group/beautifulsoup/browse_thread/thread/69093cb0d3a3cf63
-
-
-class BadDeclParser(BeautifulSoup):
-    def parse_declaration(self, i):
-        """Treat a bogus SGML declaration as raw data. Treat a CDATA
-        declaration as a CData object."""
-        j = None
-        if self.rawdata[i:i+9] == '<![CDATA[':
-            k = self.rawdata.find(']]>', i)
-            if k == -1:
-                k = len(self.rawdata)
-            data = self.rawdata[i+9:k]
-            j = k+3
-            self._toStringSubclass(data, CData)
-        else:
-            try:
-                j = SGMLParser.parse_declaration(self, i)
-            except SGMLParseError:
-                # Could not parse the DOCTYPE declaration
-                # Try to just skip the actual declaration
-                match = re.search(
-                    r'<!DOCTYPE([^>]*?)>', self.rawdata[i:],
-                    re.MULTILINE|re.IGNORECASE)  # noqa: E227
-                #           missing whitespace around bitwise or shift operator
-                if match:
-                    toHandle = self.rawdata[i:match.end()]
-                else:
-                    toHandle = self.rawdata[i:]
-                self.handle_data(toHandle)
-                j = i + len(toHandle)
-        return j
-
-
-def _parse_html(html_text, charset):
-    try:
-        return BadDeclParser(html_text, fromEncoding=charset)
-    except TypeError:
-        return None
-
-
-def parse_html(html_text, charset=None, log=None):
-    if not html_text:
-        return None
-    root = _parse_html(html_text, charset)
-    if root is None:
-        return None
-
-    _charset = root.originalEncoding
-    if _charset in ("ISO-8859-2", "windows-1252", "MacCyrillic"):
-        # Replace with default and re-parse
-        _charset = DEFAULT_CHARSET
-        root = _parse_html(html_text, _charset)
-        if root is None:
-            return None
-
-    html = root.html
-    if html is None:
-        html = root
-
-    head = html.head
-    if head is None:
-        head = html  # Some sites put TITLE in HTML without HEAD
-
-    title = head.title
-    if (title is None) and (html is not head):
-        # Some sites put TITLE in HTML outside of HEAD
-        title = html.title
-
-    if title is None:
-        # Lookup TITLE in the root
-        title = root.title
-
-    if title is not None:
-        if title.string:
-            title = title.string
-        else:
-            parts = []
-            for part in title:
-                if not isinstance(part, string_type):
-                    part = part.decode()
-                parts.append(part.strip())
-            title = ''.join(parts)
-
-    meta = head.find(_find_contenttype, recursive=False)
-    if meta:
-        try:
-            meta_content = meta.get("content")
-            if meta_content:
-                __charset = meta_content.lower().split('charset=')[1].\
-                    split(';')[0]
-            else:
-                __charset = False
-        except IndexError:  # No charset in the META Content-Type
-            meta_charset = False
-        else:
-            meta_charset = _charset == __charset
-    else:
-        meta_charset = False
-
-    if not meta_charset:
-        meta = head.find(_find_charset, recursive=False)
-        if meta:
-            meta_content = meta.get("charset")
-            if meta_content:
-                meta_charset = _charset = meta_content.lower()
-
-    #if title and (_charset or meta_charset):
-    #    title = title.encode(_charset or meta_charset)
-
-    meta = head.find(_find_refresh, recursive=False)
-    if meta:
-        refresh = meta.get("content")
-    else:
-        refresh = None
-
-    meta = head.find(_find_icon, recursive=False)
-    if meta:
-        icon = meta.get("href")
-    else:
-        icon = None
-
-    if (title is None) and (refresh is None) and (icon is None):
-        return None
-    return HTMLParser(_charset, meta_charset, title, refresh, icon)
-
-
-def _find_contenttype(Tag):
-    return (Tag.name == "meta") and \
-       (Tag.get("http-equiv", '').lower() == "content-type")
-
-
-def _find_charset(Tag):
-    return (Tag.name == "meta") and Tag.get("charset", '')
-
-
-def _find_refresh(Tag):
-    return (Tag.name == "meta") and \
-       (Tag.get("http-equiv", '').lower() == "refresh")
-
-
-def _find_icon(Tag):
-    return (Tag.name == "link") and \
-       (Tag.get("rel", '').lower() in ('icon', 'shortcut icon'))
diff --git a/parse_html/bkmk_ph_beautifulsoup4.py b/parse_html/bkmk_ph_beautifulsoup4.py

index 060f0786d473c17de36ad073c05632c06f30c35d..148a6f7166a03b7f8e990ca53b04906e749412bf 100644 (file)
--- a/parse_html/bkmk_ph_beautifulsoup4.py
+++ b/parse_html/bkmk_ph_beautifulsoup4.py
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
  """
  
  __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2017-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2017-2024 PhiloSoft Design"
  __license__ = "GNU GPL"
  
  __all__ = ['parse_html']
@@ -16,7 +16,6 @@ import warnings
  from bs4 import BeautifulSoup
  
  from .bkmk_ph_util import HTMLParser
-from compat import string_type
  
  warnings.filterwarnings(
      'ignore', 'No parser was explicitly specified')
@@ -69,8 +68,6 @@ def parse_html(html_text, charset=None, log=None):
          else:
              parts = []
              for part in title:
-                #if not isinstance(part, string_type):
-                #    part = part.decode()
                  if part.strip:
                      parts.append(part.strip())
                  else:
@@ -100,13 +97,6 @@ def parse_html(html_text, charset=None, log=None):
              if meta_content:
                  meta_charset = _charset = meta_content.lower()
  
-    #if title and (_charset or meta_charset):
-    #    try:
-    #        title = title.encode(_charset or meta_charset)
-    #    except LookupError:
-    #        title = title.encode(universal_charset)
-    #        _charset = universal_charset
-
      meta = head.find(_find_refresh, recursive=False)
      if meta:
          refresh = meta.get("content")
diff --git a/parse_html/bkmk_ph_htmlparser.py b/parse_html/bkmk_ph_htmlparser.py

index d11a2ff9fbeab4b5e5ec8daa1c2a7b4205ac63e4..b3da8e704260f24a17ef11d4d3a714fb103b7dc7 100644 (file)
--- a/parse_html/bkmk_ph_htmlparser.py
+++ b/parse_html/bkmk_ph_htmlparser.py
@@ -5,19 +5,16 @@ This file is a part of Bookmarks database and Internet robot.
  """
  
  __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2024 PhiloSoft Design"
  __license__ = "GNU GPL"
  
  __all__ = ['parse_html']
  
  
-try:
-    from HTMLParser import HTMLParseError
-except ImportError:
-    class HTMLParseError(Exception): pass
  from m_lib.net.www.html import HTMLParser as _HTMLParser
  
  
+class HTMLParseError(Exception): pass
  class HTMLHeadDone(Exception): pass
  
  
diff --git a/parse_html/bkmk_ph_lxml.py b/parse_html/bkmk_ph_lxml.py

index 24274826c52ee40179a83d96ff1eb07ceb60e6ec..7af98e7facec38d987f8add8b55ca281bf28a57a 100644 (file)
--- a/parse_html/bkmk_ph_lxml.py
+++ b/parse_html/bkmk_ph_lxml.py
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
  """
  
  __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2024 PhiloSoft Design"
  __license__ = "GNU GPL"
  
  __all__ = ['parse_html']
@@ -61,9 +61,6 @@ def parse_html(html_text, charset=None, log=None):
      else:
          meta_charset = False
  
-    #if title and (charset or meta_charset):
-    #    title = title.encode(charset or meta_charset)
-
      for m in meta:
          if m.get('http-equiv', '').lower() == 'refresh':
              refresh = m.get("content")
diff --git a/setup.py b/setup.py

index 638e946300a6e7ed05ccf4327eaa8f00fa32d8b2..4a629e4c7833bbbcf36ca98ccd5f36f51cc153f2 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -20,9 +20,8 @@ setup(
          'Intended Audience :: End Users/Desktop',
          'License :: OSI Approved :: GNU General Public License (GPL)',
          'Operating System :: OS Independent',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
          'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3 :: Only',
          'Programming Language :: Python :: 3.4',
          'Programming Language :: Python :: 3.5',
          'Programming Language :: Python :: 3.6',
@@ -33,7 +32,7 @@ setup(
          'Programming Language :: Python :: 3.11',
          'Programming Language :: Python :: 3.12',
      ],
-    python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*',
+    python_requires='>=3.4',
      install_requires=[
          'm_lib.full>=1.0',
      ],
@@ -41,6 +40,6 @@ setup(
          'html': ['beautifulsoup4', 'lxml'],
          'requests': ['requests[socks]', 'requests-ftp'],
          'curl': ['pycurl', 'certifi'],
-        'aiohttp:python_version>="3.4"': ['aiohttp', 'aioftp'],
+        'aiohttp': ['aiohttp', 'aioftp'],
      },
  )
author	Oleg Broytman <phd@phdru.name>
	Thu, 8 Aug 2024 04:45:58 +0000 (07:45 +0300)
committer	Oleg Broytman <phd@phdru.name>
	Thu, 15 Aug 2024 17:43:09 +0000 (20:43 +0300)
Storage/bkmk_stjson.py		patch \| blob \| history
Writers/bkmk_wflad.py		patch \| blob \| history
Writers/bkmk_whtml.py		patch \| blob \| history
bkmk_parser.py		patch \| blob \| history
compat.py	[deleted file]	patch \| blob \| history
doc/ANNOUNCE		patch \| blob \| history
doc/ChangeLog		patch \| blob \| history
parse_html/bkmk_parse_html.py		patch \| blob \| history
parse_html/bkmk_ph_beautifulsoup.py	[deleted file]	patch \| blob \| history
parse_html/bkmk_ph_beautifulsoup4.py		patch \| blob \| history
parse_html/bkmk_ph_htmlparser.py		patch \| blob \| history
parse_html/bkmk_ph_lxml.py		patch \| blob \| history
setup.py		patch \| blob \| history