dict["lastModified"] = convert_date_to_json(f.last_modified)
root = getattr(f, 'root')
if root: dict["root"] = root
- dict["title"] = f.name.decode('utf-8')
+ dict["title"] = f.name
dict["type"] = "text/x-moz-place-container"
if root:
self.dict["children"].append(dict)
keyword = getattr(b, 'keyword')
if keyword: dict["keyword"] = keyword
dict["lastModified"] = convert_date_to_json(b.last_modified)
- dict["title"] = b.name.decode('utf-8')
+ dict["title"] = b.name
dict["type"] = "text/x-moz-place"
dict["uri"] = b.href
self.folder_stack[-1].append(dict)
if guid: dict["guid"] = guid
dict["index"] = r.index
dict["lastModified"] = convert_date_to_json(r.last_modified)
- if r.name: dict["title"] = r.name.decode('utf-8')
+ if r.name: dict["title"] = r.name
dict["type"] = "text/x-moz-place-separator"
self.folder_stack[-1].append(dict)
folder.guid = fdict.get("guid")
folder.index = fdict.get("index")
folder.root = fdict.get("root")
- folder.name = encode_title(fdict["title"])
+ folder.name = fdict["title"]
if "children" in fdict:
for record in fdict["children"]:
elif record["type"] == "text/x-moz-place":
bookmark = Bookmark(
- href=record["uri"].encode('utf-8'),
+ href=record["uri"],
add_date=convert_date_from_json(
record.get("dateAdded")),
last_modified=convert_date_from_json(
bookmark.guid = record.get("guid")
bookmark.id = record["id"]
bookmark.index = record.get("index")
- bookmark.name = encode_title(record["title"])
+ bookmark.name = record["title"]
self.current_folder.append(bookmark)
elif record["type"] == "text/x-moz-place-separator":
ruler.index = record["index"]
ruler.last_modified = convert_date_from_json(
record.get("lastModified"))
- ruler.name = encode_title(record.get("title"))
+ ruler.name = record.get("title")
ruler.comment = get_comment(record.get("annos"))
self.current_folder.append(ruler)
return date
-def encode_title(title):
- if title:
- return title.encode("UTF-8", "xmlcharrefreplace")
- return title
-
-
def get_str(record, name):
if name in record:
- return record[name].encode('utf-8')
+ return record[name]
return ''
for a in annos:
if a["name"] == "bookmarkProperties/description":
- return a["value"].encode('utf-8')
+ return a["value"]
return ''
"expires": 4,
"flags": 0,
"name": name,
- "value": value.decode('utf-8')}]
+ "value": value,
+ }]
):
if hasattr(b, attr_name):
value = getattr(b, attr_name)
- if isinstance(value, unicode):
- value = value.encode('utf-8')
+ #if isinstance(value, unicode):
+ # value = value.encode('utf-8')
self.outfile.write("\n%s: %s" % (attr_out, value))
if hasattr(b, "last_tested"):
href += ':' + quote(password)
href += '@'
if host:
- href += host.decode(parser_charset or 'utf-8').encode('idna')
+ href += host.encode('idna').decode('ascii')
if port:
href += ':%d' % port
if path:
if BKMK_FORMAT == "MOZILLA":
from HTMLParser import HTMLParser
title = HTMLParser().unescape(
- title.replace("&", '&').decode('utf-8'))
- title = title.encode('utf-8').replace("'", "'")
+ title.replace("&", '&'))
+ title = title.replace("'", "'")
return title
def handle_data(self, data):
if data:
- if self.charset and default_encoding:
- data = data.decode(self.charset, "replace").\
- encode(default_encoding, "xmlcharrefreplace")
+ #if self.charset and default_encoding:
+ # data = data.decode(self.charset, "replace").\
+ # encode(default_encoding, "xmlcharrefreplace")
self.accumulator += data
# Mozilla - get charset
entity_re.match(part):
_part = name2codepoint.get(part[1:-1], None)
if _part is not None:
- part = unichr(_part).encode(charset)
+ part = unichr(_part)
output.append(part)
title = ''.join(output)
for part in num_entity_re.split(title):
if num_entity_re.match(part):
try:
- part = unichr(int(part[2:-1])).encode(charset)
+ part = unichr(int(part[2:-1]))
except UnicodeEncodeError:
pass # Leave the entity as is
output.append(part)
p, parser = _parsers[0]
if log: log(" Using %s" % p.__module__)
- title = parser.title
- if isinstance(title, unicode):
- if parser.charset:
- parser.title = title.encode(parser.charset)
- else:
- try:
- parser.title = title.encode('ascii')
- except UnicodeEncodeError:
- try:
- parser.title = title.encode(DEFAULT_CHARSET)
- except UnicodeEncodeError:
- parser.title = title.encode(universal_charset)
- parser.charset = universal_charset
- else:
- parser.charset = DEFAULT_CHARSET
- else:
- parser.charset = 'ascii'
+ #title = parser.title
+ #if isinstance(title, unicode):
+ # if parser.charset:
+ # parser.title = title.encode(parser.charset)
+ # else:
+ # try:
+ # parser.title = title.encode('ascii')
+ # except UnicodeEncodeError:
+ # try:
+ # parser.title = title.encode(DEFAULT_CHARSET)
+ # except UnicodeEncodeError:
+ # parser.title = title.encode(universal_charset)
+ # parser.charset = universal_charset
+ # else:
+ # parser.charset = DEFAULT_CHARSET
+ # else:
+ # parser.charset = 'ascii'
converted_title = title = parser.title
if title and (not parser.charset):
if log: log(" guessed charset: %s" % parser.charset)
# if log: log(" current charset: %s" % universal_charset)
if log: log(" title : %s" % title)
- if parser.charset != universal_charset:
- try:
- converted_title = title.decode(parser.charset).\
- encode(universal_charset)
- except UnicodeError:
- if log:
- log(" incorrect conversion from %s,"
- "converting from %s"
- % (parser.charset, DEFAULT_CHARSET))
- converted_title = \
- title.decode(DEFAULT_CHARSET, "replace").\
- encode(universal_charset, "replace")
- parser.charset = DEFAULT_CHARSET
- if log and (converted_title != title):
- log(" converted title: %s" % converted_title)
+ #if parser.charset != universal_charset:
+ # try:
+ # converted_title = title.decode(parser.charset).\
+ # encode(universal_charset)
+ # except UnicodeError:
+ # if log:
+ # log(" incorrect conversion from %s,"
+ # "converting from %s"
+ # % (parser.charset, DEFAULT_CHARSET))
+ # converted_title = \
+ # title.decode(DEFAULT_CHARSET, "replace").\
+ # encode(universal_charset, "replace")
+ # parser.charset = DEFAULT_CHARSET
+ #if log and (converted_title != title):
+ # log(" converted title: %s" % converted_title)
except LookupError:
if log: log(" unknown charset: '%s'" % parser.charset)
else:
log(" final title : %s" % final_title)
parser.title = final_title
- icon = parser.icon
- if isinstance(icon, unicode):
- try:
- parser.icon = icon.encode('ascii')
- except UnicodeEncodeError:
- if parser.charset:
- parser.icon = icon.encode(parser.charset)
+ #icon = parser.icon
+ #if isinstance(icon, unicode):
+ # try:
+ # parser.icon = icon.encode('ascii')
+ # except UnicodeEncodeError:
+ # if parser.charset:
+ # parser.icon = icon.encode(parser.charset)
return parser
if meta_content:
meta_charset = _charset = meta_content.lower()
- if title and (_charset or meta_charset):
- title = title.encode(_charset or meta_charset)
+ #if title and (_charset or meta_charset):
+ # title = title.encode(_charset or meta_charset)
meta = head.find(_find_refresh, recursive=False)
if meta:
if meta_content:
meta_charset = _charset = meta_content.lower()
- if title and (_charset or meta_charset):
- try:
- title = title.encode(_charset or meta_charset)
- except LookupError:
- title = title.encode(universal_charset)
- _charset = universal_charset
+ #if title and (_charset or meta_charset):
+ # try:
+ # title = title.encode(_charset or meta_charset)
+ # except LookupError:
+ # title = title.encode(universal_charset)
+ # _charset = universal_charset
meta = head.find(_find_refresh, recursive=False)
if meta:
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
else:
meta_charset = False
- if title and (charset or meta_charset):
- title = title.encode(charset or meta_charset)
+ #if title and (charset or meta_charset):
+ # title = title.encode(charset or meta_charset)
for m in meta:
if m.get('http-equiv', '').lower() == 'refresh':
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
if not charset:
charset = parser.tokenizer.stream.charEncoding[0]
- if title and (charset or meta_charset):
- title = title.encode(charset or meta_charset)
+ #if title and (charset or meta_charset):
+ # title = title.encode(charset or meta_charset)
for node in head.childNodes:
if node.name == 'meta' and \
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
else:
meta_charset = False
- if title and (charset or meta_charset):
- title = title.encode(charset or meta_charset)
+ #if title and (charset or meta_charset):
+ # title = title.encode(charset or meta_charset)
for m in meta:
if m.get('http-equiv', '').lower() == 'refresh':