From 968bbee3279fdea70161ed595146f11ec69c57ea Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sat, 16 Sep 2023 22:26:28 +0300 Subject: [PATCH] Fix(Py3): Fix `basestring` compatibility --- parse_html/bkmk_ph_beautifulsoup.py | 4 +++- parse_html/bkmk_ph_beautifulsoup4.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/parse_html/bkmk_ph_beautifulsoup.py b/parse_html/bkmk_ph_beautifulsoup.py index e29b499..888f868 100644 --- a/parse_html/bkmk_ph_beautifulsoup.py +++ b/parse_html/bkmk_ph_beautifulsoup.py @@ -14,7 +14,9 @@ __all__ = ['parse_html'] import re from sgmllib import SGMLParser, SGMLParseError from BeautifulSoup import BeautifulSoup, CData + from .bkmk_ph_util import HTMLParser +from ..compat import string_type DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic @@ -95,7 +97,7 @@ def parse_html(html_text, charset=None, log=None): else: parts = [] for part in title: - if not isinstance(part, basestring): + if not isinstance(part, string_type): part = part.decode() parts.append(part.strip()) title = ''.join(parts) diff --git a/parse_html/bkmk_ph_beautifulsoup4.py b/parse_html/bkmk_ph_beautifulsoup4.py index 1f77eb7..33a2e18 100644 --- a/parse_html/bkmk_ph_beautifulsoup4.py +++ b/parse_html/bkmk_ph_beautifulsoup4.py @@ -12,7 +12,9 @@ __all__ = ['parse_html'] from bs4 import BeautifulSoup + from .bkmk_ph_util import HTMLParser +from ..compat import string_type universal_charset = "utf-8" DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic @@ -54,7 +56,7 @@ def parse_html(html_text, charset=None, log=None): else: parts = [] for part in title: - if not isinstance(part, basestring): + if not isinstance(part, string_type): part = part.decode() parts.append(part.strip()) title = ''.join(parts) -- 2.39.5