check_subp = None
subp_pipe = None
+
def stop_subp(log):
global check_subp, subp_pipe
if check_subp:
del check_subp
del subp_pipe
+
def restart_subp(log):
global check_subp, subp_pipe
stop_subp(log)
_set_subproc = True
+
class robot_forking(Robot):
subproc = 'urllib2' # Default subprocess
self.errcode = errcode
self.newurl = newurl
+
class MyURLopener(urllib.URLopener):
# Error 301 -- relocated (permanently)
def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
urllib_ftpwrapper = urllib.ftpwrapper
ftpcache_key = None
+
class myftpwrapper(urllib_ftpwrapper):
def __init__(self, user, passwd, host, port, dirs):
urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2014-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2014-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['robot_urllib2']
_fw = None
+
class FTPHandler(urllib2.FTPHandler):
def connect_ftp(self, user, passwd, host, port, dirs, timeout):
global _fw
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['storage_json']
date = int(float(date) * 10**6)
return date
+
def convert_date_from_json(date):
if date:
date = float(date)
date /= 10.0**6
return date
+
def encode_title(title):
if title:
return title.encode("UTF-8", "xmlcharrefreplace")
return title
+
def get_str(record, name):
if name in record:
return record[name].encode('utf-8')
return ''
+
def get_comment(annos):
if not annos:
return ''
return ''
+
def make_annos(value, name="bookmarkProperties/description"):
return [{
"expires": 4,
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['writer_html']
ind_s = " "*4
+
class writer_html(Writer):
filename = "bookmarks.html"
BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
+
class Folder(list):
isFolder = 1
isBookmark = 0
return root_folder
+
def break_tree(linear):
del linear[0]
title = title.replace("'", "'")
return title
+
def unquote_title(title):
if BKMK_FORMAT == "MOZILLA":
from HTMLParser import HTMLParser
param_list[key] = value
return main_param, param_list
+
def set_params(obj, params):
if hasattr(params, "items"):
params = params.items()
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
__license__ = "GNU GPL"
log_file = None
+
def report_dup(href, object_no):
s = "Duplicate URL: %s (first at rec. %d)" % (href, object_no)
pass
if report_stats:
- print("Broytman check_dups, Copyright (C) 2000-2017 PhiloSoft Design")
+ print("Broytman check_dups, Copyright (C) 2000-2023 PhiloSoft Design")
if args:
sys.stderr.write("check_urls: too many arguments\n")
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
__license__ = "GNU GPL"
import sys
HTTP = httplib.HTTP
+
class MyHTTP(HTTP):
def _setup(self, conn):
HTTP._setup(self, conn)
def run():
- print("Broytman check_url, Copyright (C) 2010-2017 PhiloSoft Design")
+ print("Broytman check_url, Copyright (C) 2010-2023 PhiloSoft Design")
if len(sys.argv) < 2:
sys.stderr.write("Usage: check_url.py url1 [url2...]\n")
entity_re = re.compile("(&\w+;)")
num_entity_re = re.compile("(&#[0-9]+;)")
+
def recode_entities(title, charset):
output = []
for part in entity_re.split(title):
import os
BKMK_DEBUG_HTML_PARSERS = os.environ.get("BKMK_DEBUG_HTML_PARSERS")
+
def parse_html(html_text, charset=None, log=None):
if not parsers:
return None
parser.icon = icon.encode(parser.charset)
return parser
+
def parse_filename(filename, charset=None, log=None):
fp = open(filename, 'r')
try:
DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
# http://groups.google.com/group/beautifulsoup/browse_thread/thread/69093cb0d3a3cf63
+
+
class BadDeclParser(BeautifulSoup):
def parse_declaration(self, i):
"""Treat a bogus SGML declaration as raw data. Treat a CDATA
except TypeError:
return None
+
def parse_html(html_text, charset=None, log=None):
root = _parse_html(html_text, charset)
if root is None:
return None
return HTMLParser(_charset, meta_charset, title, refresh, icon)
+
def _find_contenttype(Tag):
return (Tag.name == "meta") and \
(Tag.get("http-equiv", '').lower() == "content-type")
+
def _find_charset(Tag):
return (Tag.name == "meta") and Tag.get("charset", '')
+
def _find_refresh(Tag):
return (Tag.name == "meta") and \
(Tag.get("http-equiv", '').lower() == "refresh")
+
def _find_icon(Tag):
return (Tag.name == "link") and \
(Tag.get("rel", '').lower() in ('icon', 'shortcut icon'))
universal_charset = "utf-8"
DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
+
def _parse_html(html_text, charset):
try:
return BeautifulSoup(html_text, from_encoding=charset)
except TypeError:
return None
+
def parse_html(html_text, charset=None, log=None):
root = _parse_html(html_text, charset)
if root is None:
return None
return HTMLParser(_charset, meta_charset, title, refresh, icon)
+
def _find_contenttype(Tag):
return (Tag.name == "meta") and \
(Tag.get_attribute_list("http-equiv", '')[0].lower() == "content-type")
+
def _find_charset(Tag):
return (Tag.name == "meta") and Tag.get("charset", '')
+
def _find_refresh(Tag):
return (Tag.name == "meta") and \
(Tag.get_attribute_list("http-equiv", '')[0].lower() == "refresh")
+
def _find_icon(Tag):
return (Tag.name == "link") and \
(Tag.get_attribute_list("rel", '')[0].lower() in ('icon', 'shortcut icon'))
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['HTMLParser']
from HTMLParser import HTMLParser
+
class HTMLParser(HTMLParser):
def __init__(self, charset, meta_charset, title, refresh, icon):
object.__init__(self)
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['import_robot', 'robot']
robot_name, robot_params = parse_params(environ.get("BKMK_ROBOT", "forking"))
+
def import_robot(robot_name):
exec("from Robots import bkmk_r%s" % robot_name)
exec("robot = bkmk_r%s.robot_%s" % (robot_name, robot_name))
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['import_storage', 'storage']
storage_name, storage_params = parse_params(environ.get("BKMK_STORAGE", "pickle"))
+
def import_storage(storage_name):
exec("from Storage import bkmk_st%s" % storage_name)
exec("storage = bkmk_st%s.storage_%s" % (storage_name, storage_name))
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['import_writer', 'writer']
writer_name, writer_params = parse_params(environ.get("BKMK_WRITER", "html"))
+
def import_writer(writer_name):
exec("from Writers import bkmk_w%s" % writer_name)
exec("writer = bkmk_w%s.writer_%s" % (writer_name, writer_name))