"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['robot_forking']
import sys, os
try:
- import cPickle as pickle
+ import cPickle as pickle
except ImportError:
- import pickle
+ import pickle
from subproc import Subprocess, RecordFile
from bkmk_objects import Robot
subp_pipe = None
def stop_subp(log):
- global check_subp, subp_pipe
- if check_subp:
- if log: log(" restarting hanging subprocess")
- del check_subp
- del subp_pipe
+ global check_subp, subp_pipe
+ if check_subp:
+ if log: log(" restarting hanging subprocess")
+ del check_subp
+ del subp_pipe
def restart_subp(log):
- global check_subp, subp_pipe
- stop_subp(log)
+ global check_subp, subp_pipe
+ stop_subp(log)
- check_subp = Subprocess("%s/Robots/bkmk_rforking_sub.py" % os.path.dirname(sys.argv[0]),
- control_stderr=True)
- subp_pipe = RecordFile(check_subp)
+ check_subp = Subprocess("%s/Robots/bkmk_rforking_sub.py" % os.path.dirname(sys.argv[0]),
+ control_stderr=True)
+ subp_pipe = RecordFile(check_subp)
_set_subproc = True
class robot_forking(Robot):
- subproc = 'urllib2' # Default subprocess
-
- def check_url(self, bookmark):
- global _set_subproc
- if _set_subproc:
- _set_subproc = False
-
- subproc = self.subproc
- subproc_attrs = []
- for attr in dir(self):
- if attr.startswith('subproc_'):
- subproc_attrs.append((attr[len('subproc_'):], getattr(self, attr)))
- if subproc_attrs:
- subproc += ':' + ':'.join(['='.join((k,v)) for k,v in subproc_attrs])
- os.environ['BKMK_ROBOT'] = subproc
-
- if not check_subp:
- restart_subp(self.log) # Not restart, just start afresh
-
- try:
- save_parent = bookmark.parent
- bookmark.parent = None
- subp_pipe.write_record(pickle.dumps(bookmark))
-
- if check_subp.waitForPendingChar(60): # wait a minute
- new_b = pickle.loads(subp_pipe.read_record())
- for attr in ("error", "no_error",
- "moved", "size", "md5", "real_title",
- "last_tested", "last_modified", "test_time",
- "icon", "icon_href"):
- if hasattr(new_b, attr):
- setattr(bookmark, attr, getattr(new_b, attr))
- else:
- bookmark.error = "Subprocess connection timed out"
- restart_subp(self.log)
-
- bookmark.parent = save_parent
-
- while True:
- error = check_subp.readPendingErrLine()
- if not error:
- break
- sys.stderr.write("(subp) " + error)
- sys.stderr.flush()
-
- except KeyboardInterrupt:
- return 0
-
- # Tested
- return 1
-
- def stop(self):
- stop_subp(None) # Stop subprocess; do not log restarting
+ subproc = 'urllib2' # Default subprocess
+
+ def check_url(self, bookmark):
+ global _set_subproc
+ if _set_subproc:
+ _set_subproc = False
+
+ subproc = self.subproc
+ subproc_attrs = []
+ for attr in dir(self):
+ if attr.startswith('subproc_'):
+ subproc_attrs.append((attr[len('subproc_'):], getattr(self, attr)))
+ if subproc_attrs:
+ subproc += ':' + ':'.join(['='.join((k,v)) for k,v in subproc_attrs])
+ os.environ['BKMK_ROBOT'] = subproc
+
+ if not check_subp:
+ restart_subp(self.log) # Not restart, just start afresh
+
+ try:
+ save_parent = bookmark.parent
+ bookmark.parent = None
+ subp_pipe.write_record(pickle.dumps(bookmark))
+
+ if check_subp.waitForPendingChar(60): # wait a minute
+ new_b = pickle.loads(subp_pipe.read_record())
+ for attr in ("error", "no_error",
+ "moved", "size", "md5", "real_title",
+ "last_tested", "last_modified", "test_time",
+ "icon", "icon_href"):
+ if hasattr(new_b, attr):
+ setattr(bookmark, attr, getattr(new_b, attr))
+ else:
+ bookmark.error = "Subprocess connection timed out"
+ restart_subp(self.log)
+
+ bookmark.parent = save_parent
+
+ while True:
+ error = check_subp.readPendingErrLine()
+ if not error:
+ break
+ sys.stderr.write("(subp) " + error)
+ sys.stderr.flush()
+
+ except KeyboardInterrupt:
+ return 0
+
+ # Tested
+ return 1
+
+ def stop(self):
+ stop_subp(None) # Stop subprocess; do not log restarting
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1999-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1999-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = []
sys.path.append(lib_dir) # for bkmk_objects.py
try:
- import cPickle
- pickle = cPickle
+ import cPickle
+ pickle = cPickle
except ImportError:
- import pickle
+ import pickle
from subproc import RecordFile
def run():
- bkmk_in = RecordFile(sys.stdin)
- bkmk_out = RecordFile(sys.stdout)
+ bkmk_in = RecordFile(sys.stdin)
+ bkmk_out = RecordFile(sys.stdout)
- from m_lib.flog import openlog
- log = openlog("check2.log")
- from robots import robot
- robot = robot(log)
+ from m_lib.flog import openlog
+ log = openlog("check2.log")
+ from robots import robot
+ robot = robot(log)
- while 1:
- bookmark = pickle.loads(bkmk_in.read_record())
- log(bookmark.href)
- robot.check_url(bookmark)
- bkmk_out.write_record(pickle.dumps(bookmark))
- log.outfile.flush()
+ while 1:
+ bookmark = pickle.loads(bkmk_in.read_record())
+ log(bookmark.href)
+ robot.check_url(bookmark)
+ bkmk_out.write_record(pickle.dumps(bookmark))
+ log.outfile.flush()
- log.close()
+ log.close()
if __name__ == '__main__':
- run()
+ run()
def get_error(e):
- if isinstance(e, str):
- return e
+ if isinstance(e, str):
+ return e
- else:
- s = []
- for i in e:
- s.append("'%s'" % str(i).replace('\n', "\\n"))
- return "(%s)" % ' '.join(s)
+ else:
+ s = []
+ for i in e:
+ s.append("'%s'" % str(i).replace('\n', "\\n"))
+ return "(%s)" % ' '.join(s)
icons = {} # Icon cache; maps URL to a tuple (content type, data)
# or None if there is no icon.
class robot_base(Robot):
- timeout = 60
+ timeout = 60
- def __init__(self, *args, **kw):
+ def __init__(self, *args, **kw):
Robot.__init__(self, *args, **kw)
socket.setdefaulttimeout(int(self.timeout))
- def check_url(self, bookmark):
- try:
- self.start = int(time.time())
- bookmark.icon = None
+ def check_url(self, bookmark):
+ try:
+ self.start = int(time.time())
+ bookmark.icon = None
- url_type, url_rest = urllib.splittype(bookmark.href)
- url_host, url_path = urllib.splithost(url_rest)
- url_path, url_tag = urllib.splittag(url_path)
+ url_type, url_rest = urllib.splittype(bookmark.href)
+ url_host, url_path = urllib.splithost(url_rest)
+ url_path, url_tag = urllib.splittag(url_path)
- url = "%s://%s%s" % (url_type, url_host, url_path)
- error, redirect_code, redirect_to, headers, content = self.get(bookmark, url, True)
+ url = "%s://%s%s" % (url_type, url_host, url_path)
+ error, redirect_code, redirect_to, headers, content = self.get(bookmark, url, True)
- if error:
- bookmark.error = error
- return 1
+ if error:
+ bookmark.error = error
+ return 1
- if redirect_code:
- self.set_redirect(bookmark, redirect_code, redirect_to)
- return 1
+ if redirect_code:
+ self.set_redirect(bookmark, redirect_code, redirect_to)
+ return 1
- size = 0
- last_modified = None
+ size = 0
+ last_modified = None
- if headers:
- try:
- size = headers["Content-Length"]
- except KeyError:
- size = len(content)
+ if headers:
+ try:
+ size = headers["Content-Length"]
+ except KeyError:
+ size = len(content)
- try:
- last_modified = headers["Last-Modified"]
- except KeyError:
- pass
+ try:
+ last_modified = headers["Last-Modified"]
+ except KeyError:
+ pass
+
+ if last_modified:
+ last_modified = parse_time(last_modified)
+ else:
+ size = len(content)
if last_modified:
- last_modified = parse_time(last_modified)
- else:
- size = len(content)
-
- if last_modified:
- last_modified = str(int(last_modified))
- else:
- last_modified = bookmark.last_visit
-
- bookmark.size = size
- bookmark.last_modified = last_modified
-
- md5 = md5wrapper()
- if url_type == "ftp": # Pass welcome message through MD5
- md5.update(self.get_ftp_welcome())
-
- md5.update(content)
- bookmark.md5 = str(md5)
-
- if headers:
- try:
- content_type = headers["Content-Type"]
- self.log(" Content-Type: %s" % content_type)
- try:
- # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
- content_type, charset = content_type.split(';', 1)
- content_type = content_type.strip()
- charset = charset.split('=')[1].strip().split(',')[0]
- self.log(" HTTP charset : %s" % charset)
- except (ValueError, IndexError):
- charset = None
- self.log(" no charset in Content-Type header")
- for ctype in ("text/html", "application/xhtml+xml"):
- if content_type.startswith(ctype):
- html = True
- break
- else:
- html = False
- if html:
- parser = parse_html(content, charset, self.log)
- if parser:
- bookmark.real_title = parser.title
- icon = parser.icon
- else:
- icon = None
- if not icon:
- icon = "/favicon.ico"
- icon_url = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon)
- self.log(" looking for icon at: %s" % icon_url)
- if icon_url in icons:
- if icons[icon_url]:
- bookmark.icon_href = icon_url
- content_type, bookmark.icon = icons[icon_url]
- self.log(" cached icon: %s" % content_type)
- else:
- self.log(" cached icon: no icon")
- else:
- try:
- _icon_url = icon_url
- for i in range(8):
- error, icon_redirect_code, icon_redirect_to, \
- icon_headers, icon_data = \
- self.get(bookmark, _icon_url)
- if icon_redirect_code:
- _icon_url = icon_redirect_to
- self.log(" redirect to : %s" % _icon_url)
- else:
- if icon_data is None:
- raise IOError("No icon")
- break
+ last_modified = str(int(last_modified))
+ else:
+ last_modified = bookmark.last_visit
+
+ bookmark.size = size
+ bookmark.last_modified = last_modified
+
+ md5 = md5wrapper()
+ if url_type == "ftp": # Pass welcome message through MD5
+ md5.update(self.get_ftp_welcome())
+
+ md5.update(content)
+ bookmark.md5 = str(md5)
+
+ if headers:
+ try:
+ content_type = headers["Content-Type"]
+ self.log(" Content-Type: %s" % content_type)
+ try:
+ # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+ content_type, charset = content_type.split(';', 1)
+ content_type = content_type.strip()
+ charset = charset.split('=')[1].strip().split(',')[0]
+ self.log(" HTTP charset : %s" % charset)
+ except (ValueError, IndexError):
+ charset = None
+ self.log(" no charset in Content-Type header")
+ for ctype in ("text/html", "application/xhtml+xml"):
+ if content_type.startswith(ctype):
+ html = True
+ break
+ else:
+ html = False
+ if html:
+ parser = parse_html(content, charset, self.log)
+ if parser:
+ bookmark.real_title = parser.title
+ icon = parser.icon
else:
- raise IOError("Too many redirects")
- except:
- etype, emsg, tb = sys.exc_info()
- self.log(" no icon : %s %s" % (etype, emsg))
- etype = emsg = tb = None
- icons[icon_url] = None
- else:
- content_type = icon_headers["Content-Type"]
- if content_type.startswith("application/") \
- or content_type.startswith("image/") \
- or content_type.startswith("text/plain"):
- bookmark.icon_href = icon_url
- self.log(" got icon : %s" % content_type)
- if content_type.startswith("application/") \
- or content_type.startswith("text/plain"):
- self.log(" non-image content type, assume x-icon")
- content_type = 'image/x-icon'
- bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data))
- icons[icon_url] = (content_type, bookmark.icon)
+ icon = None
+ if not icon:
+ icon = "/favicon.ico"
+ icon_url = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon)
+ self.log(" looking for icon at: %s" % icon_url)
+ if icon_url in icons:
+ if icons[icon_url]:
+ bookmark.icon_href = icon_url
+ content_type, bookmark.icon = icons[icon_url]
+ self.log(" cached icon: %s" % content_type)
+ else:
+ self.log(" cached icon: no icon")
else:
- self.log(" no icon : bad content type '%s'" % content_type)
- icons[icon_url] = None
- if parser and parser.refresh:
- refresh = parser.refresh
- try:
- url = refresh.split('=', 1)[1]
- except IndexError:
- url = "self"
- try:
- timeout = float(refresh.split(';')[0])
- except (IndexError, ValueError):
- self.set_redirect(bookmark, "html", "Bad redirect to %s (%s)" % (url, refresh))
- else:
- try:
- timeout = int(refresh.split(';')[0])
- except ValueError:
- pass # float timeout
- self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout))
-
- except KeyError as key:
- self.log(" no header: %s" % key)
-
- except EOFError:
- bookmark.error = "Unexpected EOF (FTP server closed connection)"
- self.log(' EOF: %s' % bookmark.error)
-
- except KeyboardInterrupt:
- self.log("Keyboard interrupt (^C)")
- return 0
-
- except socket.error as e:
- bookmark.error = get_error(e)
- self.log(bookmark.error)
-
- except:
- import traceback
- traceback.print_exc()
- bookmark.error = "Exception!"
- self.log(' Exception: %s' % bookmark.error)
-
- finally:
- self.finish_check_url(bookmark)
-
- # Tested
- return 1
-
- def set_redirect(self, bookmark, errcode, newurl):
+ try:
+ _icon_url = icon_url
+ for i in range(8):
+ error, icon_redirect_code, icon_redirect_to, \
+ icon_headers, icon_data = \
+ self.get(bookmark, _icon_url)
+ if icon_redirect_code:
+ _icon_url = icon_redirect_to
+ self.log(" redirect to : %s" % _icon_url)
+ else:
+ if icon_data is None:
+ raise IOError("No icon")
+ break
+ else:
+ raise IOError("Too many redirects")
+ except:
+ etype, emsg, tb = sys.exc_info()
+ self.log(" no icon : %s %s" % (etype, emsg))
+ etype = emsg = tb = None
+ icons[icon_url] = None
+ else:
+ content_type = icon_headers["Content-Type"]
+ if content_type.startswith("application/") \
+ or content_type.startswith("image/") \
+ or content_type.startswith("text/plain"):
+ bookmark.icon_href = icon_url
+ self.log(" got icon : %s" % content_type)
+ if content_type.startswith("application/") \
+ or content_type.startswith("text/plain"):
+ self.log(" non-image content type, assume x-icon")
+ content_type = 'image/x-icon'
+ bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data))
+ icons[icon_url] = (content_type, bookmark.icon)
+ else:
+ self.log(" no icon : bad content type '%s'" % content_type)
+ icons[icon_url] = None
+ if parser and parser.refresh:
+ refresh = parser.refresh
+ try:
+ url = refresh.split('=', 1)[1]
+ except IndexError:
+ url = "self"
+ try:
+ timeout = float(refresh.split(';')[0])
+ except (IndexError, ValueError):
+ self.set_redirect(bookmark, "html", "Bad redirect to %s (%s)" % (url, refresh))
+ else:
+ try:
+ timeout = int(refresh.split(';')[0])
+ except ValueError:
+ pass # float timeout
+ self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout))
+
+ except KeyError as key:
+ self.log(" no header: %s" % key)
+
+ except EOFError:
+ bookmark.error = "Unexpected EOF (FTP server closed connection)"
+ self.log(' EOF: %s' % bookmark.error)
+
+ except KeyboardInterrupt:
+ self.log("Keyboard interrupt (^C)")
+ return 0
+
+ except socket.error as e:
+ bookmark.error = get_error(e)
+ self.log(bookmark.error)
+
+ except:
+ import traceback
+ traceback.print_exc()
+ bookmark.error = "Exception!"
+ self.log(' Exception: %s' % bookmark.error)
+
+ finally:
+ self.finish_check_url(bookmark)
+
+ # Tested
+ return 1
+
+ def set_redirect(self, bookmark, errcode, newurl):
bookmark.moved = "(%s) to %s" % (reloc_dict[errcode], newurl)
self.log(' Moved: %s' % bookmark.moved)
- def finish_check_url(self, bookmark):
- start = self.start
- bookmark.last_tested = str(start)
- now = int(time.time())
- bookmark.test_time = str(now - start)
+ def finish_check_url(self, bookmark):
+ start = self.start
+ bookmark.last_tested = str(start)
+ now = int(time.time())
+ bookmark.test_time = str(now - start)
class RedirectException(Exception):
- def __init__(self, errcode, newurl):
- Exception.__init__(self)
- self.errcode = errcode
- self.newurl = newurl
+ def __init__(self, errcode, newurl):
+ Exception.__init__(self)
+ self.errcode = errcode
+ self.newurl = newurl
class MyURLopener(urllib.URLopener):
- # Error 301 -- relocated (permanently)
- def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
- if headers.has_key('location'):
- newurl = headers['location']
- elif headers.has_key('uri'):
- newurl = headers['uri']
- else:
- newurl = "Nowhere"
- raise RedirectException(errcode, newurl)
-
- # Error 302 -- relocated (temporarily)
- http_error_302 = http_error_301
- # Error 303 -- relocated (see other)
- http_error_303 = http_error_301
- # Error 307 -- relocated (temporarily)
- http_error_307 = http_error_301
-
- # Error 401 -- authentication required
- def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
- raise IOError(('http error', errcode, "Authentication required ", headers))
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- if fp:
- void = fp.read()
- fp.close()
- raise IOError(('http error', errcode, errmsg, headers))
+ # Error 301 -- relocated (permanently)
+ def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
+ if headers.has_key('location'):
+ newurl = headers['location']
+ elif headers.has_key('uri'):
+ newurl = headers['uri']
+ else:
+ newurl = "Nowhere"
+ raise RedirectException(errcode, newurl)
+
+ # Error 302 -- relocated (temporarily)
+ http_error_302 = http_error_301
+ # Error 303 -- relocated (see other)
+ http_error_303 = http_error_301
+ # Error 307 -- relocated (temporarily)
+ http_error_307 = http_error_301
+
+ # Error 401 -- authentication required
+ def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
+ raise IOError(('http error', errcode, "Authentication required ", headers))
+
+ def http_error_default(self, url, fp, errcode, errmsg, headers):
+ if fp:
+ void = fp.read()
+ fp.close()
+ raise IOError(('http error', errcode, errmsg, headers))
urllib._urlopener = MyURLopener()
ftpcache_key = None
class myftpwrapper(urllib_ftpwrapper):
- def __init__(self, user, passwd, host, port, dirs):
- urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
- global ftpcache_key
- ftpcache_key = (user, host, port, '/'.join(dirs))
+ def __init__(self, user, passwd, host, port, dirs):
+ urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
+ global ftpcache_key
+ ftpcache_key = (user, host, port, '/'.join(dirs))
urllib.ftpwrapper = myftpwrapper
class robot_urllib(robot_base):
- def get(self, bookmark, url, accept_charset=False):
- try:
- # Set fake referer to the base URL
- urllib._urlopener.addheaders[2] = ('Referer', url)
-
- if accept_charset and bookmark.charset:
- urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
- try:
- fname, headers = urllib.urlretrieve(url)
- finally:
+ def get(self, bookmark, url, accept_charset=False):
+ try:
+ # Set fake referer to the base URL
+ urllib._urlopener.addheaders[2] = ('Referer', url)
+
if accept_charset and bookmark.charset:
- del urllib._urlopener.addheaders[-1] # Remove Accept-Charset
-
- infile = open(fname, 'rb')
- content = infile.read()
- infile.close()
-
- return None, None, None, headers, content
-
- except RedirectException as e:
- return None, e.errcode, e.newurl, None, None
-
- except IOError as e:
- if (e[0] == "http error") and (e[1] == -1):
- error = None
- bookmark.no_error = "The server did not return any header - it is not an error, actually"
- self.log(' no headers: %s' % bookmark.no_error)
- else:
- error = get_error(e)
- self.log(' Error: %s' % error)
-
- return error, None, None, None, None
-
- def get_ftp_welcome(self):
- global ftpcache_key
- _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
- ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
- # If there are - ftpcache_key in prev line is invalid.
- return _welcome
-
- def finish_check_url(self, bookmark):
- robot_base.finish_check_url(self, bookmark)
- urllib.urlcleanup()
+ urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
+ try:
+ fname, headers = urllib.urlretrieve(url)
+ finally:
+ if accept_charset and bookmark.charset:
+ del urllib._urlopener.addheaders[-1] # Remove Accept-Charset
+
+ infile = open(fname, 'rb')
+ content = infile.read()
+ infile.close()
+
+ return None, None, None, headers, content
+
+ except RedirectException as e:
+ return None, e.errcode, e.newurl, None, None
+
+ except IOError as e:
+ if (e[0] == "http error") and (e[1] == -1):
+ error = None
+ bookmark.no_error = "The server did not return any header - it is not an error, actually"
+ self.log(' no headers: %s' % bookmark.no_error)
+ else:
+ error = get_error(e)
+ self.log(' Error: %s' % error)
+
+ return error, None, None, None, None
+
+ def get_ftp_welcome(self):
+ global ftpcache_key
+ _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
+ ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
+ # If there are - ftpcache_key in prev line is invalid.
+ return _welcome
+
+ def finish_check_url(self, bookmark):
+ robot_base.finish_check_url(self, bookmark)
+ urllib.urlcleanup()
class storage_flad(Walker):
- filename = "bookmarks_db.flad"
-
- def __init__(self):
- self.first_object = 1
-
- def root_folder(self, f):
- header = string.replace(f.header, ".", ".\n")
- header = string.replace(header, "<", "\n<", 3)[1:]
- header_file = open("header", 'w')
- header_file.write(header + "\n")
- header_file.write('<H1>%s</H1>\n\n' % f.name)
- if f.comment: header_file.write('<DD>%s\n' % f.comment)
- header_file.close()
-
- def start_folder(self, f, level):
- self.outfile.write("""
-Level: %d
-Folder: %s
-AddDate: %s
-Comment: %s
-LastModified: %s
-""" % (level, f.name, f.add_date, f.comment, f.last_modified or ''))
-
- def bookmark(self, b, level):
- self.outfile.write("""
-Level: %d
-Title: %s
-URL: %s
-AddDate: %s
-LastVisit: %s
-LastModified: %s
-Keyword: %s
-Comment: %s
-IconURI: %s
-Icon: %s
-Charset: %s
-""" % (level+1, b.name, b.href, b.add_date, b.last_visit, b.last_modified,
- b.keyword, b.comment.replace('\n', "\\n"),
- b.icon_href or '', b.icon or '', b.charset or ''))
-
- def ruler(self, r, level):
- self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
-
- def store(self, root_folder):
- self.outfile = open(self.filename, 'w')
- root_folder.walk_depth(self)
- self.outfile.close()
-
- def unindent(self, old_level, new_level):
- while old_level > new_level:
- old_level = old_level - 1
- del self.folder_stack[-1]
-
- if self.folder_stack:
- self.current_folder = self.folder_stack[-1]
- else:
- self.current_folder = None
-
- def load(self):
- bookmarks_db = fladm.load_from_file(self.filename, fladm.check_record, ["Level"])
-
- root_folder = Folder()
- self.folder_stack = [root_folder]
- self.current_folder = root_folder
-
- header_file = open("header", 'r')
- header = header_file.read()
- header_file.close()
-
- header = string.split(header, "\n")
- root_folder.header = string.join(header[:5], '')
- root_folder.name = header[5][4:-5]
- root_folder.comment = string.join(header[7:], '')[4:]
-
- save_level = 0
- got_folder = 1 # Start as if we already have one folder
-
- for record in bookmarks_db:
- level = int(record["Level"])
-
- if level == save_level:
- pass
- elif level == save_level + 1:
- if not got_folder:
- raise ValueError("indent without folder")
- elif level <= save_level - 1:
- self.unindent(save_level, level)
- else:
- raise ValueError("new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1))
-
- save_level = level
- got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
-
- if record.has_key("URL"):
- comment = record["Comment"].replace("\\n", '\n')
- bookmark = Bookmark(record["URL"], record["AddDate"],
- record["LastVisit"], record["LastModified"],
- record["Keyword"], comment)
- bookmark.name = record["Title"]
- self.current_folder.append(bookmark)
-
- elif record.has_key("Folder"):
- folder = Folder(record["AddDate"], record["Comment"], record["LastModified"])
- folder.name = record["Folder"]
- self.current_folder.append(folder)
- self.folder_stack.append(folder)
- self.current_folder = folder
-
- elif record.has_key("Ruler"):
- self.current_folder.append(Ruler())
-
- else:
- raise KeyError("neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record))
-
- if save_level >= 0:
- self.unindent(save_level, 0)
- else:
- raise ValueError("new level (%d) too little - must be >= 0" % save_level)
-
- return root_folder
+ filename = "bookmarks_db.flad"
+
+ def __init__(self):
+ self.first_object = 1
+
+ def root_folder(self, f):
+ header = string.replace(f.header, ".", ".\n")
+ header = string.replace(header, "<", "\n<", 3)[1:]
+ header_file = open("header", 'w')
+ header_file.write(header + "\n")
+ header_file.write('<H1>%s</H1>\n\n' % f.name)
+ if f.comment: header_file.write('<DD>%s\n' % f.comment)
+ header_file.close()
+
+ def start_folder(self, f, level):
+ self.outfile.write("""
+ Level: %d
+ Folder: %s
+ AddDate: %s
+ Comment: %s
+ LastModified: %s
+ """ % (level, f.name, f.add_date, f.comment, f.last_modified or ''))
+
+ def bookmark(self, b, level):
+ self.outfile.write("""
+ Level: %d
+ Title: %s
+ URL: %s
+ AddDate: %s
+ LastVisit: %s
+ LastModified: %s
+ Keyword: %s
+ Comment: %s
+ IconURI: %s
+ Icon: %s
+ Charset: %s
+ """ % (level+1, b.name, b.href, b.add_date, b.last_visit, b.last_modified,
+ b.keyword, b.comment.replace('\n', "\\n"),
+ b.icon_href or '', b.icon or '', b.charset or ''))
+
+ def ruler(self, r, level):
+ self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
+
+ def store(self, root_folder):
+ self.outfile = open(self.filename, 'w')
+ root_folder.walk_depth(self)
+ self.outfile.close()
+
+ def unindent(self, old_level, new_level):
+ while old_level > new_level:
+ old_level = old_level - 1
+ del self.folder_stack[-1]
+
+ if self.folder_stack:
+ self.current_folder = self.folder_stack[-1]
+ else:
+ self.current_folder = None
+
+ def load(self):
+ bookmarks_db = fladm.load_from_file(self.filename, fladm.check_record, ["Level"])
+
+ root_folder = Folder()
+ self.folder_stack = [root_folder]
+ self.current_folder = root_folder
+
+ header_file = open("header", 'r')
+ header = header_file.read()
+ header_file.close()
+
+ header = string.split(header, "\n")
+ root_folder.header = string.join(header[:5], '')
+ root_folder.name = header[5][4:-5]
+ root_folder.comment = string.join(header[7:], '')[4:]
+
+ save_level = 0
+ got_folder = 1 # Start as if we already have one folder
+
+ for record in bookmarks_db:
+ level = int(record["Level"])
+
+ if level == save_level:
+ pass
+ elif level == save_level + 1:
+ if not got_folder:
+ raise ValueError("indent without folder")
+ elif level <= save_level - 1:
+ self.unindent(save_level, level)
+ else:
+ raise ValueError("new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1))
+
+ save_level = level
+ got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
+
+ if record.has_key("URL"):
+ comment = record["Comment"].replace("\\n", '\n')
+ bookmark = Bookmark(record["URL"], record["AddDate"],
+ record["LastVisit"], record["LastModified"],
+ record["Keyword"], comment)
+ bookmark.name = record["Title"]
+ self.current_folder.append(bookmark)
+
+ elif record.has_key("Folder"):
+ folder = Folder(record["AddDate"], record["Comment"], record["LastModified"])
+ folder.name = record["Folder"]
+ self.current_folder.append(folder)
+ self.folder_stack.append(folder)
+ self.current_folder = folder
+
+ elif record.has_key("Ruler"):
+ self.current_folder.append(Ruler())
+
+ else:
+ raise KeyError("neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record))
+
+ if save_level >= 0:
+ self.unindent(save_level, 0)
+ else:
+ raise ValueError("new level (%d) too little - must be >= 0" % save_level)
+
+ return root_folder
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['storage_json']
try:
- import json
+ import json
except ImportError:
- import simplejson as json
+ import simplejson as json
from bkmk_objects import Folder, Bookmark, Ruler, Walker
self.current_folder = root_folder
if "type" not in bookmarks_dict:
- bookmarks_dict["id"] = "0"
- bookmarks_dict["title"] = ""
- bookmarks_dict["type"] = "text/x-moz-place-container"
+ bookmarks_dict["id"] = "0"
+ bookmarks_dict["title"] = ""
+ bookmarks_dict["type"] = "text/x-moz-place-container"
self.load_folder(root_folder, bookmarks_dict)
if self.folder_stack:
raise RuntimeError('Excessive folder stack: %s' % self.folder_stack)
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['storage_pickle']
try:
- import cPickle
- pickle = cPickle
+ import cPickle
+ pickle = cPickle
except ImportError:
- import pickle
+ import pickle
class storage_pickle(object):
- filename = "bookmarks_db.pickle"
+ filename = "bookmarks_db.pickle"
- def store(self, root_folder):
- outfile = open(self.filename, 'wb')
- pickle.dump(root_folder, outfile, 1)
- outfile.close()
+ def store(self, root_folder):
+ outfile = open(self.filename, 'wb')
+ pickle.dump(root_folder, outfile, 1)
+ outfile.close()
- def load(self):
- infile = open(self.filename, 'rb')
- root_folder = pickle.load(infile)
- infile.close()
+ def load(self):
+ infile = open(self.filename, 'rb')
+ root_folder = pickle.load(infile)
+ infile.close()
- return root_folder
+ return root_folder
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['writer_flad']
def strftime(s):
- try:
- return time.strftime("%a %d %b %Y %T", time.localtime(int(s)))
- except (TypeError, ValueError): # s is None or is already formatted
- return s
+ try:
+ return time.strftime("%a %d %b %Y %T", time.localtime(int(s)))
+ except (TypeError, ValueError): # s is None or is already formatted
+ return s
class writer_flad(Writer):
- filename = "bookmarks_db.flad"
-
- def __init__(self, outfile, prune=None):
- Writer.__init__(self, outfile, prune)
- self.first_object = 1
-
- def start_folder(self, f, level):
- self.outfile.write("""
-Level: %d
-Folder: %s
-AddDate: %s
-Comment: %s
-LastModified: %s
-""" % (level, f.name, strftime(f.add_date), f.comment, strftime(f.last_modified)))
-
- def bookmark(self, b, level):
- self.outfile.write("""
-Level: %d
-Title: %s
-URL: %s
-AddDate: %s
-LastVisit: %s
-LastModified: %s
-Keyword: %s
-Comment: %s""" % (level+1, b.name, b.href, strftime(b.add_date), strftime(b.last_visit), strftime(b.last_modified), b.keyword, b.comment))
-
- for attr_name, attr_out in (("error", "Error"), ("no_error", "NoError"),
- ("moved", "Moved"), ("size", "Size"), ("md5", "Md5"),
- ("real_title", "RealTitle"), ("test_time", "TestTime"),
- ("icon_href", "IconURI"), ("icon", "Icon"), ("charset", "Charset")):
- if hasattr(b, attr_name):
- value = getattr(b, attr_name)
- if isinstance(value, unicode):
- value = value.encode('utf-8')
- self.outfile.write("\n%s: %s" % (attr_out, value))
-
- if hasattr(b, "last_tested"):
- self.outfile.write("\n%s: %s" % ("LastTested", strftime(getattr(b, "last_tested"))))
-
- self.outfile.write("\n")
-
- def ruler(self, r, level):
- self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
+ filename = "bookmarks_db.flad"
+
+ def __init__(self, outfile, prune=None):
+ Writer.__init__(self, outfile, prune)
+ self.first_object = 1
+
+ def start_folder(self, f, level):
+ self.outfile.write("""
+ Level: %d
+ Folder: %s
+ AddDate: %s
+ Comment: %s
+ LastModified: %s
+ """ % (level, f.name, strftime(f.add_date), f.comment, strftime(f.last_modified)))
+
+ def bookmark(self, b, level):
+ self.outfile.write("""
+ Level: %d
+ Title: %s
+ URL: %s
+ AddDate: %s
+ LastVisit: %s
+ LastModified: %s
+ Keyword: %s
+ Comment: %s""" % (level+1, b.name, b.href, strftime(b.add_date), strftime(b.last_visit), strftime(b.last_modified), b.keyword, b.comment))
+
+ for attr_name, attr_out in (("error", "Error"), ("no_error", "NoError"),
+ ("moved", "Moved"), ("size", "Size"), ("md5", "Md5"),
+ ("real_title", "RealTitle"), ("test_time", "TestTime"),
+ ("icon_href", "IconURI"), ("icon", "Icon"), ("charset", "Charset")):
+ if hasattr(b, attr_name):
+ value = getattr(b, attr_name)
+ if isinstance(value, unicode):
+ value = value.encode('utf-8')
+ self.outfile.write("\n%s: %s" % (attr_out, value))
+
+ if hasattr(b, "last_tested"):
+ self.outfile.write("\n%s: %s" % ("LastTested", strftime(getattr(b, "last_tested"))))
+
+ self.outfile.write("\n")
+
+ def ruler(self, r, level):
+ self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['writer_flad_err']
class writer_flad_err(writer_flad):
- filename = "bookmarks_db.errors"
+ filename = "bookmarks_db.errors"
- def bookmark(self, b, level):
- if hasattr(b, "error"):
- writer_flad.bookmark(self, b, level)
+ def bookmark(self, b, level):
+ if hasattr(b, "error"):
+ writer_flad.bookmark(self, b, level)
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['writer_html']
def dump_comment(comment):
- comment = comment.replace("<BR>\n", "\n")
- if BKMK_FORMAT == "NETSCAPE":
- comment = comment.replace("\n", "<BR>\n")
- return comment
+ comment = comment.replace("<BR>\n", "\n")
+ if BKMK_FORMAT == "NETSCAPE":
+ comment = comment.replace("\n", "<BR>\n")
+ return comment
ind_s = " "*4
class writer_html(Writer):
- filename = "bookmarks.html"
+ filename = "bookmarks.html"
- def _folder(self, f, level):
- if f.comment: self.outfile.write('<DD>%s\n' % dump_comment(f.comment))
- self.outfile.write(ind_s*level + "<DL><p>\n")
+ def _folder(self, f, level):
+ if f.comment: self.outfile.write('<DD>%s\n' % dump_comment(f.comment))
+ self.outfile.write(ind_s*level + "<DL><p>\n")
- def root_folder(self, f):
- self.outfile.write("<!DOCTYPE NETSCAPE-Bookmark-file-1>\n")
- self.outfile.write(f.header + "\n")
- self.outfile.write('<H1>%s</H1>\n\n' % quote_title(f.name))
- self._folder(f, 0)
+ def root_folder(self, f):
+ self.outfile.write("<!DOCTYPE NETSCAPE-Bookmark-file-1>\n")
+ self.outfile.write(f.header + "\n")
+ self.outfile.write('<H1>%s</H1>\n\n' % quote_title(f.name))
+ self._folder(f, 0)
- def start_folder(self, f, level):
- self.outfile.write(ind_s*level + '<DT><H3 ADD_DATE="%s"' % f.add_date)
- if (BKMK_FORMAT == "MOZILLA") and f.last_modified: self.outfile.write(' LAST_MODIFIED="%s"' % f.last_modified)
- self.outfile.write('>%s</H3>\n' % quote_title(f.name))
- self._folder(f, level)
+ def start_folder(self, f, level):
+ self.outfile.write(ind_s*level + '<DT><H3 ADD_DATE="%s"' % f.add_date)
+ if (BKMK_FORMAT == "MOZILLA") and f.last_modified: self.outfile.write(' LAST_MODIFIED="%s"' % f.last_modified)
+ self.outfile.write('>%s</H3>\n' % quote_title(f.name))
+ self._folder(f, level)
- def end_folder(self, f, level):
- self.outfile.write(ind_s*level + "</DL><p>\n")
+ def end_folder(self, f, level):
+ self.outfile.write(ind_s*level + "</DL><p>\n")
- def bookmark(self, b, level):
- self.outfile.write(ind_s*(level+1) + '<DT><A HREF="%s" ADD_DATE="%s"' % (b.href, b.add_date))
- if b.last_visit: self.outfile.write(' LAST_VISIT="%s"' % b.last_visit)
- if b.last_modified:
- self.outfile.write(' LAST_MODIFIED="%s"' % b.last_modified)
- if BKMK_FORMAT == "MOZILLA":
- if b.keyword: self.outfile.write(' SHORTCUTURL="%s"' % b.keyword)
- if b.icon_href:
- value = b.icon_href
- if isinstance(value, unicode):
- value = value.encode('utf-8')
- self.outfile.write(' ICON_URI="%s"' % value)
- if b.icon: self.outfile.write(' ICON="%s"' % b.icon)
- if b.charset: self.outfile.write(' LAST_CHARSET="%s"' % b.charset)
- self.outfile.write('>%s</A>\n' % quote_title(b.name))
- if b.comment: self.outfile.write('<DD>%s\n' % dump_comment(b.comment))
+ def bookmark(self, b, level):
+ self.outfile.write(ind_s*(level+1) + '<DT><A HREF="%s" ADD_DATE="%s"' % (b.href, b.add_date))
+ if b.last_visit: self.outfile.write(' LAST_VISIT="%s"' % b.last_visit)
+ if b.last_modified:
+ self.outfile.write(' LAST_MODIFIED="%s"' % b.last_modified)
+ if BKMK_FORMAT == "MOZILLA":
+ if b.keyword: self.outfile.write(' SHORTCUTURL="%s"' % b.keyword)
+ if b.icon_href:
+ value = b.icon_href
+ if isinstance(value, unicode):
+ value = value.encode('utf-8')
+ self.outfile.write(' ICON_URI="%s"' % value)
+ if b.icon: self.outfile.write(' ICON="%s"' % b.icon)
+ if b.charset: self.outfile.write(' LAST_CHARSET="%s"' % b.charset)
+ self.outfile.write('>%s</A>\n' % quote_title(b.name))
+ if b.comment: self.outfile.write('<DD>%s\n' % dump_comment(b.comment))
- def ruler(self, r, level):
- self.outfile.write(ind_s*(level+1) + "<HR>\n")
+ def ruler(self, r, level):
+ self.outfile.write(ind_s*(level+1) + "<HR>\n")
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['writer_txt']
class writer_txt(Writer):
- filename = "dump.txt"
+ filename = "dump.txt"
- def root_folder(self, f):
- self.outfile.write("Folder: %s\n" % f.name)
+ def root_folder(self, f):
+ self.outfile.write("Folder: %s\n" % f.name)
- def start_folder(self, f, level):
- self.outfile.write(ind_s*level + "Folder: %s\n" % f.name)
+ def start_folder(self, f, level):
+ self.outfile.write(ind_s*level + "Folder: %s\n" % f.name)
- def end_folder(self, f, level):
- self.outfile.write(ind_s*level + "Folder end: %s\n" % f.name)
+ def end_folder(self, f, level):
+ self.outfile.write(ind_s*level + "Folder end: %s\n" % f.name)
- def bookmark(self, b, level):
- self.outfile.write(ind_s*(level+1) + "Bookmark: %s\n" % b.name)
+ def bookmark(self, b, level):
+ self.outfile.write(ind_s*(level+1) + "Bookmark: %s\n" % b.name)
- def ruler(self, r, level):
- self.outfile.write(ind_s*(level+1) + "-----\n")
+ def ruler(self, r, level):
+ self.outfile.write(ind_s*(level+1) + "-----\n")
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "t:")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "t:")
- report_stats = 1
- title = ''
+ report_stats = 1
+ title = ''
- for _opt, _arg in optlist:
- if _opt == '-s':
- report_stats = 0
- elif _opt == '-t':
- title = _arg
- try:
- del _opt, _arg
- except NameError:
- pass
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ elif _opt == '-t':
+ title = _arg
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
- if len(args) != 1:
- sys.stderr.write("bkmk-add: too many or too few arguments\n")
- sys.stderr.write("Usage: bkmk-add [-s] [-t title] url\n")
- sys.exit(1)
+ if len(args) != 1:
+ sys.stderr.write("bkmk-add: too many or too few arguments\n")
+ sys.stderr.write("Usage: bkmk-add [-s] [-t title] url\n")
+ sys.exit(1)
- from storage import storage
- storage = storage()
+ from storage import storage
+ storage = storage()
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
- root_folder = storage.load()
+ root_folder = storage.load()
- if report_stats:
- print("Ok")
+ if report_stats:
+ print("Ok")
- href = args[0]
- now = int(time.time())
- bookmark = Bookmark(href, str(now), '0', '0')
- bookmark.name = ''
- bookmark.parent = None
+ href = args[0]
+ now = int(time.time())
+ bookmark = Bookmark(href, str(now), '0', '0')
+ bookmark.name = ''
+ bookmark.parent = None
- global robot
- robot = robot(None)
+ global robot
+ robot = robot(None)
- if robot.check_url(bookmark): # get real title and last modified date
- if title: # forced title
- bookmark.name = title
- elif hasattr(bookmark, "real_title"):
- bookmark.name = bookmark.real_title
- if report_stats:
- sys.stdout.write("Adding %s with title '%s'\n" % (href, bookmark.name))
- del bookmark.parent
- root_folder.append(bookmark)
+ if robot.check_url(bookmark): # get real title and last modified date
+ if title: # forced title
+ bookmark.name = title
+ elif hasattr(bookmark, "real_title"):
+ bookmark.name = bookmark.real_title
+ if report_stats:
+ sys.stdout.write("Adding %s with title '%s'\n" % (href, bookmark.name))
+ del bookmark.parent
+ root_folder.append(bookmark)
- if report_stats:
- sys.stdout.write("Storing %s: " % storage.filename)
- sys.stdout.flush()
+ if report_stats:
+ sys.stdout.write("Storing %s: " % storage.filename)
+ sys.stdout.flush()
- storage.store(root_folder)
+ storage.store(root_folder)
- if report_stats:
- print("Ok")
+ if report_stats:
+ print("Ok")
if __name__ == '__main__':
- run()
+ run()
def run():
- optlist, args = getopt(sys.argv[1:], "is")
+ optlist, args = getopt(sys.argv[1:], "is")
- show_pbar = True
- report_stats = 1
-
- for _opt, _arg in optlist:
- if _opt == '-i':
- show_pbar = 0
- if _opt == '-s':
- report_stats = 0
- try:
- del _opt, _arg
- except NameError:
- pass
+ show_pbar = True
+ report_stats = 1
+
+ for _opt, _arg in optlist:
+ if _opt == '-i':
+ show_pbar = 0
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
- if args:
- if len(args) > 1:
- sys.stderr.write("bkmk2db: too many arguments\n")
- sys.stderr.write("Usage: bkmk2db [-is] bookmarks.html\n")
- sys.exit(1)
+ if args:
+ if len(args) > 1:
+ sys.stderr.write("bkmk2db: too many arguments\n")
+ sys.stderr.write("Usage: bkmk2db [-is] bookmarks.html\n")
+ sys.exit(1)
- filename = args[0]
+ filename = args[0]
- else:
- filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX
+ else:
+ filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX
- if report_stats:
- from storage import storage_name
- sys.stdout.write("Converting %s to %s: " % (filename, storage_name))
- sys.stdout.flush()
+ if report_stats:
+ from storage import storage_name
+ sys.stdout.write("Converting %s to %s: " % (filename, storage_name))
+ sys.stdout.flush()
- if show_pbar:
- show_pbar = sys.stderr.isatty()
+ if show_pbar:
+ show_pbar = sys.stderr.isatty()
- if show_pbar:
- try:
- from m_lib.pbar.tty_pbar import ttyProgressBar
- except ImportError:
- show_pbar = 0
+ if show_pbar:
+ try:
+ from m_lib.pbar.tty_pbar import ttyProgressBar
+ except ImportError:
+ show_pbar = 0
- if show_pbar:
- try:
- size = os.path.getsize(filename)
- except:
- print(filename, ": no such file")
- sys.exit(1)
+ if show_pbar:
+ try:
+ size = os.path.getsize(filename)
+ except:
+ print(filename, ": no such file")
+ sys.exit(1)
- if show_pbar:
- pbar = ttyProgressBar(0, size)
- lng = 0
+ if show_pbar:
+ pbar = ttyProgressBar(0, size)
+ lng = 0
- # This is for DOS - it counts CRLF, which len() counts as 1 char!
- if os.name == 'dos' or os.name == 'nt' :
- dos_add = 1
- else:
- dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct
+ # This is for DOS - it counts CRLF, which len() counts as 1 char!
+ if os.name == 'dos' or os.name == 'nt' :
+ dos_add = 1
+ else:
+ dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct
- infile = open(filename, 'r')
- parser = BkmkParser()
+ infile = open(filename, 'r')
+ parser = BkmkParser()
- line_no = 0
- lng = 0
- ok = 1
+ line_no = 0
+ lng = 0
+ ok = 1
- for line in infile:
- if show_pbar:
- lng = lng + len(line) + dos_add
- pbar.display(lng)
+ for line in infile:
+ if show_pbar:
+ lng = lng + len(line) + dos_add
+ pbar.display(lng)
- #line = line.strip()
- line_no = line_no + 1
+ #line = line.strip()
+ line_no = line_no + 1
- try:
- parser.feed(line)
- except:
- ok = 0
- break
+ try:
+ parser.feed(line)
+ except:
+ ok = 0
+ break
- try:
- parser.close()
- except:
- ok = 0
+ try:
+ parser.close()
+ except:
+ ok = 0
- infile.close()
+ infile.close()
- if show_pbar:
- del pbar
+ if show_pbar:
+ del pbar
- if report_stats:
- print("Ok")
- print(line_no, "lines proceed")
- print(parser.urls, "urls found")
- print(parser.objects, "objects created")
+ if report_stats:
+ print("Ok")
+ print(line_no, "lines proceed")
+ print(parser.urls, "urls found")
+ print(parser.objects, "objects created")
- if ok:
- from storage import storage
- storage = storage()
- storage.store(parser.root_folder)
+ if ok:
+ from storage import storage
+ storage = storage()
+ storage.store(parser.root_folder)
- else:
- import traceback
- traceback.print_exc()
- sys.exit(1)
+ else:
+ import traceback
+ traceback.print_exc()
+ sys.exit(1)
if __name__ == '__main__':
- run()
+ run()
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
class Folder(list):
- isFolder = 1
- isBookmark = 0
-
- def __init__(self, add_date=None, comment='', last_modified=None):
- super(Folder, self).__init__()
- self.comment = comment
- self.add_date = add_date
- self.last_modified = last_modified
-
- def walk_depth(self, walker, level=0):
- if hasattr(self, "header"): # root folder
- prune = 0
- walker.root_folder(self)
- else:
- prune = walker.prune_folder(self)
- if not prune:
- walker.start_folder(self, level)
-
- if not prune:
- for object in self:
- if object.isFolder:
- object.walk_depth(walker, level+1)
- elif object.isBookmark:
- walker.bookmark(object, level)
- else:
- walker.ruler(object, level)
-
- walker.end_folder(self, level)
+ isFolder = 1
+ isBookmark = 0
+
+ def __init__(self, add_date=None, comment='', last_modified=None):
+ super(Folder, self).__init__()
+ self.comment = comment
+ self.add_date = add_date
+ self.last_modified = last_modified
+
+ def walk_depth(self, walker, level=0):
+ if hasattr(self, "header"): # root folder
+ prune = 0
+ walker.root_folder(self)
+ else:
+ prune = walker.prune_folder(self)
+ if not prune:
+ walker.start_folder(self, level)
+
+ if not prune:
+ for object in self:
+ if object.isFolder:
+ object.walk_depth(walker, level+1)
+ elif object.isBookmark:
+ walker.bookmark(object, level)
+ else:
+ walker.ruler(object, level)
+
+ walker.end_folder(self, level)
class Bookmark(object):
- isFolder = 0
- isBookmark = 1
-
- def __init__(self, href, add_date, last_visit=None, last_modified=None,
- keyword=None, comment='', icon_href=None, icon=None,
- charset=None, parser_charset=None):
- protocol, request = urllib.splittype(href)
- user, password, port = None, None, None
- host, path = urllib.splithost(request)
- if host:
- user, host = urllib.splituser(host)
- if user:
- user, password = urllib.splitpasswd(user)
- host, port = urllib.splitport(host)
- if port: port = int(port)
-
- if protocol == 'place':
- href = protocol + ":"
- else:
- href = protocol + "://"
- if user:
- href += urllib.quote(user)
- if password:
- href += ':' + urllib.quote(password)
- href += '@'
- if host:
- href += host.decode(parser_charset or 'utf-8').encode('idna')
- if port:
- href += ':%d' % port
- if path:
- href += path
-
- self.href = href
- self.add_date = add_date
- self.last_visit = last_visit
- self.last_modified = last_modified
- self.keyword = keyword
- self.comment = comment
- self.icon_href = icon_href
- self.icon = icon
- self.charset = charset
+ isFolder = 0
+ isBookmark = 1
+
+ def __init__(self, href, add_date, last_visit=None, last_modified=None,
+ keyword=None, comment='', icon_href=None, icon=None,
+ charset=None, parser_charset=None):
+ protocol, request = urllib.splittype(href)
+ user, password, port = None, None, None
+ host, path = urllib.splithost(request)
+ if host:
+ user, host = urllib.splituser(host)
+ if user:
+ user, password = urllib.splitpasswd(user)
+ host, port = urllib.splitport(host)
+ if port: port = int(port)
+
+ if protocol == 'place':
+ href = protocol + ":"
+ else:
+ href = protocol + "://"
+ if user:
+ href += urllib.quote(user)
+ if password:
+ href += ':' + urllib.quote(password)
+ href += '@'
+ if host:
+ href += host.decode(parser_charset or 'utf-8').encode('idna')
+ if port:
+ href += ':%d' % port
+ if path:
+ href += path
+
+ self.href = href
+ self.add_date = add_date
+ self.last_visit = last_visit
+ self.last_modified = last_modified
+ self.keyword = keyword
+ self.comment = comment
+ self.icon_href = icon_href
+ self.icon = icon
+ self.charset = charset
class Ruler(object):
- isFolder = 0
- isBookmark = 0
+ isFolder = 0
+ isBookmark = 0
class Walker(object):
- """
- Interface class. Any instance that will be passed to Folder.walk_depth
- may be derived from this class. It is not mandatory - unlike Java
- Python does not require interface classes; but it is convenient to have
- some methods predefined to no-op, in case you do not want to
- provide end_folder etc.
- """
+ """
+ Interface class. Any instance that will be passed to Folder.walk_depth
+ may be derived from this class. It is not mandatory - unlike Java
+ Python does not require interface classes; but it is convenient to have
+ some methods predefined to no-op, in case you do not want to
+ provide end_folder etc.
+ """
- def root_folder(self, r):
- pass
+ def root_folder(self, r):
+ pass
- def start_folder(self, f, level):
- pass
+ def start_folder(self, f, level):
+ pass
- def end_folder(self, f, level):
- pass
+ def end_folder(self, f, level):
+ pass
- def bookmark(self, b, level):
- pass
+ def bookmark(self, b, level):
+ pass
- def ruler(self, r, level):
- pass
+ def ruler(self, r, level):
+ pass
- def prune_folder(self, folder):
- return 0
+ def prune_folder(self, folder):
+ return 0
class Writer(Walker):
- def __init__(self, outfile, prune=None):
- self.outfile = outfile
- self.prune = prune
+ def __init__(self, outfile, prune=None):
+ self.outfile = outfile
+ self.prune = prune
- def prune_folder(self, folder):
- return self.prune == folder.name
+ def prune_folder(self, folder):
+ return self.prune == folder.name
class Robot(object):
- def __init__(self, log):
- self.log = log
+ def __init__(self, log):
+ self.log = log
- def stop(self):
- pass # Nothing to do on cleanup
+ def stop(self):
+ pass # Nothing to do on cleanup
# Helper class to make inverese links (nodes linked to their parent)
class InverseLinker(Walker):
- def root_folder(self, r):
- self.parent_stack = [r]
+ def root_folder(self, r):
+ self.parent_stack = [r]
- def start_folder(self, f, level):
- f.parent = self.parent_stack[-1]
- self.parent_stack.append(f) # Push the folder onto the stack of parents
+ def start_folder(self, f, level):
+ f.parent = self.parent_stack[-1]
+ self.parent_stack.append(f) # Push the folder onto the stack of parents
- def end_folder(self, f, level):
- del self.parent_stack[-1] # Pop off the stack
+ def end_folder(self, f, level):
+ del self.parent_stack[-1] # Pop off the stack
- def bookmark(self, b, level):
- b.parent = self.parent_stack[-1]
+ def bookmark(self, b, level):
+ b.parent = self.parent_stack[-1]
- def ruler(self, r, level):
- r.parent = self.parent_stack[-1]
+ def ruler(self, r, level):
+ r.parent = self.parent_stack[-1]
# Helper class to make linear represenatation of the tree
class Linear(Walker):
- def root_folder(self, r):
- r.linear = [r]
- self.linear = r.linear
+ def root_folder(self, r):
+ r.linear = [r]
+ self.linear = r.linear
- def add_object(self, object):
- self.linear.append(object)
+ def add_object(self, object):
+ self.linear.append(object)
- def start_folder(self, f, level):
- self.add_object(f)
+ def start_folder(self, f, level):
+ self.add_object(f)
- def bookmark(self, b, level):
- self.add_object(b)
+ def bookmark(self, b, level):
+ self.add_object(b)
- def ruler(self, r, level):
- self.add_object(r)
+ def ruler(self, r, level):
+ self.add_object(r)
# Helper - make linked linear represenatation of the tree, suitable to be stored in sequential storage
def make_linear(root_folder):
- linker = InverseLinker()
- root_folder.walk_depth(linker)
+ linker = InverseLinker()
+ root_folder.walk_depth(linker)
- linear = Linear()
- root_folder.walk_depth(linear)
+ linear = Linear()
+ root_folder.walk_depth(linear)
# Helper, opposite of make_linear - make a tree from the linked linear representation
def make_tree(linear):
- root_folder = linear[0]
- del linear[0]
+ root_folder = linear[0]
+ del linear[0]
- for object in linear:
- object.parent.append(object)
+ for object in linear:
+ object.parent.append(object)
- return root_folder
+ return root_folder
def break_tree(linear):
- del linear[0]
+ del linear[0]
- for object in linear:
- del object.parent
+ for object in linear:
+ del object.parent
def quote_title(title):
- if BKMK_FORMAT == "MOZILLA":
- title = title.replace("'", "'")
- return title
+ if BKMK_FORMAT == "MOZILLA":
+ title = title.replace("'", "'")
+ return title
def unquote_title(title):
- if BKMK_FORMAT == "MOZILLA":
- from HTMLParser import HTMLParser
- title = HTMLParser().unescape(title.replace("&", '&').decode('utf-8'))
- title = title.encode('utf-8').replace("'", "'")
- return title
+ if BKMK_FORMAT == "MOZILLA":
+ from HTMLParser import HTMLParser
+ title = HTMLParser().unescape(title.replace("&", '&').decode('utf-8'))
+ title = title.encode('utf-8').replace("'", "'")
+ return title
def parse_params(param_str):
DEBUG = os.environ.has_key("BKMK_DEBUG")
if DEBUG:
- def debug(note):
- print(note)
+ def debug(note):
+ print(note)
- def dump_names(folder_stack):
- l = []
- for object in folder_stack:
- if object.isFolder:
- l.append(object.name)
- return "'%s'" % "' '".join(l)
+ def dump_names(folder_stack):
+ l = []
+ for object in folder_stack:
+ if object.isFolder:
+ l.append(object.name)
+ return "'%s'" % "' '".join(l)
else:
- def debug(note):
- pass
- dump_names = debug
+ def debug(note):
+ pass
+ dump_names = debug
class BkmkParser(HTMLParser):
- def __init__(self):
- HTMLParser.__init__(self)
-
- self.urls = 0
- self.objects = 0
-
- self.charset = None
- self.recode = None
-
- def handle_data(self, data):
- if data:
- if self.charset and default_encoding:
- data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace")
- self.accumulator += data
-
- # Mozilla - get charset
- def do_meta(self, attrs):
- http_equiv = ""
- content = ""
-
- for attrname, value in attrs:
- value = value.strip()
- if attrname == 'http-equiv':
- http_equiv = value.lower()
- elif attrname == 'content':
- content = value
-
- if http_equiv == "content-type":
- try:
- # extract charset from "text/html; charset=UTF-8"
- self.charset = content.split('=')[1]
- except IndexError:
- pass
-
- def start_title(self, attrs):
- if default_encoding:
- self.accumulator += '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%s">\n' % default_encoding
- self.accumulator += "<TITLE>"
-
- def end_title(self):
- self.accumulator += "</TITLE>"
-
- # Start root folder
- def start_h1(self, attrs):
- root_folder = Folder()
- self.current_object = root_folder
- self.root_folder = root_folder
- self.current_folder = root_folder
- self.folder_stack = [root_folder]
-
- self.root_folder.header = self.accumulator.strip()
- self.accumulator = ''
-
- def end_h1(self):
- accumulator = self.accumulator
- self.accumulator = ''
-
- debug("Root folder name: `%s'" % accumulator)
- self.root_folder.name = accumulator
-
- # Start a folder
- def start_h3(self, attrs):
- last_modified = None
- for attrname, value in attrs:
- value = value.strip()
- if attrname == 'add_date':
- add_date = value
- elif attrname == 'last_modified':
- last_modified = value
-
- debug("New folder...")
- folder = Folder(add_date, last_modified=last_modified)
- self.current_object = folder
- self.current_folder.append(folder)
- self.folder_stack.append(folder) # push new folder
- self.current_folder = folder
- self.objects += 1
-
- def end_h3(self):
- accumulator = self.accumulator
- self.accumulator = ''
-
- debug("Folder name: `%s'" % accumulator)
- self.current_folder.name = accumulator
-
- # Start a bookmark
- def start_a(self, attrs):
- add_date = None
- last_visit = None
- last_modified = None
- keyword = ''
- icon = None
- charset = None
-
- for attrname, value in attrs:
- value = value.strip()
- if attrname == "href":
- href = value
- elif attrname == "add_date":
- add_date = value
- elif attrname == "last_visit":
- last_visit = value
- elif attrname == "last_modified":
- last_modified = value
- elif attrname == "shortcuturl":
- keyword = value
- elif attrname == "icon":
- icon = value
- elif attrname == "last_charset":
- charset = value
-
- debug("Bookmark points to: `%s'" % href)
- bookmark = Bookmark(href, add_date, last_visit, last_modified,
- keyword=keyword, icon=icon,
- charset=charset, parser_charset=self.charset or default_encoding)
- self.current_object = bookmark
- self.current_folder.append(bookmark)
- self.urls += 1
- self.objects += 1
-
- def end_a(self):
- accumulator = self.accumulator
- self.accumulator = ''
-
- debug("Bookmark name: `%s'" % accumulator)
- bookmark = self.current_folder[-1]
- bookmark.name = accumulator
-
- def flush(self):
- accumulator = self.accumulator
-
- if accumulator:
- self.accumulator = ''
-
- current_object = self.current_object
- if current_object:
- current_object.comment += accumulator.strip()
- debug("Comment: `%s'" % current_object.comment)
-
- def start_dl(self, attrs):
- self.flush()
-
- do_dt = start_dl
-
- # End of folder
- def end_dl(self):
- self.flush()
- debug("End folder")
- debug("Folder stack: %s" % dump_names(self.folder_stack))
- if self.folder_stack:
- del self.folder_stack[-1] # pop last folder
- if self.folder_stack:
- self.current_folder = self.folder_stack[-1]
- else:
- debug("FOLDER STACK is EMPTY!!! (1)")
- else:
- debug("FOLDER STACK is EMPTY!!! (2)")
- self.current_object = None
-
- def close(self):
- HTMLParser.close(self)
- if self.folder_stack:
- raise ValueError("wrong folder stack: %s" % self.folder_stack)
-
- def do_dd(self, attrs):
- pass
-
- do_p = do_dd
-
- # Start ruler
- def do_hr(self, attrs):
- self.flush()
- debug("Ruler")
- self.current_folder.append(Ruler())
- self.current_object = None
- self.objects += 1
-
- # BR in comment
- def do_br(self, attrs):
- self.accumulator += "<BR>"
-
- # Allow < in the text
- def unknown_starttag(self, tag, attrs):
- self.accumulator += "<%s>" % tag
-
- # Do not allow unknow end tags
- def unknown_endtag(self, tag):
- raise NotImplementedError("Unknow end tag `%s'" % tag)
+ def __init__(self):
+ HTMLParser.__init__(self)
+
+ self.urls = 0
+ self.objects = 0
+
+ self.charset = None
+ self.recode = None
+
+ def handle_data(self, data):
+ if data:
+ if self.charset and default_encoding:
+ data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace")
+ self.accumulator += data
+
+ # Mozilla - get charset
+ def do_meta(self, attrs):
+ http_equiv = ""
+ content = ""
+
+ for attrname, value in attrs:
+ value = value.strip()
+ if attrname == 'http-equiv':
+ http_equiv = value.lower()
+ elif attrname == 'content':
+ content = value
+
+ if http_equiv == "content-type":
+ try:
+ # extract charset from "text/html; charset=UTF-8"
+ self.charset = content.split('=')[1]
+ except IndexError:
+ pass
+
+ def start_title(self, attrs):
+ if default_encoding:
+ self.accumulator += '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%s">\n' % default_encoding
+ self.accumulator += "<TITLE>"
+
+ def end_title(self):
+ self.accumulator += "</TITLE>"
+
+ # Start root folder
+ def start_h1(self, attrs):
+ root_folder = Folder()
+ self.current_object = root_folder
+ self.root_folder = root_folder
+ self.current_folder = root_folder
+ self.folder_stack = [root_folder]
+
+ self.root_folder.header = self.accumulator.strip()
+ self.accumulator = ''
+
+ def end_h1(self):
+ accumulator = self.accumulator
+ self.accumulator = ''
+
+ debug("Root folder name: `%s'" % accumulator)
+ self.root_folder.name = accumulator
+
+ # Start a folder
+ def start_h3(self, attrs):
+ last_modified = None
+ for attrname, value in attrs:
+ value = value.strip()
+ if attrname == 'add_date':
+ add_date = value
+ elif attrname == 'last_modified':
+ last_modified = value
+
+ debug("New folder...")
+ folder = Folder(add_date, last_modified=last_modified)
+ self.current_object = folder
+ self.current_folder.append(folder)
+ self.folder_stack.append(folder) # push new folder
+ self.current_folder = folder
+ self.objects += 1
+
+ def end_h3(self):
+ accumulator = self.accumulator
+ self.accumulator = ''
+
+ debug("Folder name: `%s'" % accumulator)
+ self.current_folder.name = accumulator
+
+ # Start a bookmark
+ def start_a(self, attrs):
+ add_date = None
+ last_visit = None
+ last_modified = None
+ keyword = ''
+ icon = None
+ charset = None
+
+ for attrname, value in attrs:
+ value = value.strip()
+ if attrname == "href":
+ href = value
+ elif attrname == "add_date":
+ add_date = value
+ elif attrname == "last_visit":
+ last_visit = value
+ elif attrname == "last_modified":
+ last_modified = value
+ elif attrname == "shortcuturl":
+ keyword = value
+ elif attrname == "icon":
+ icon = value
+ elif attrname == "last_charset":
+ charset = value
+
+ debug("Bookmark points to: `%s'" % href)
+ bookmark = Bookmark(href, add_date, last_visit, last_modified,
+ keyword=keyword, icon=icon,
+ charset=charset, parser_charset=self.charset or default_encoding)
+ self.current_object = bookmark
+ self.current_folder.append(bookmark)
+ self.urls += 1
+ self.objects += 1
+
+ def end_a(self):
+ accumulator = self.accumulator
+ self.accumulator = ''
+
+ debug("Bookmark name: `%s'" % accumulator)
+ bookmark = self.current_folder[-1]
+ bookmark.name = accumulator
+
+ def flush(self):
+ accumulator = self.accumulator
+
+ if accumulator:
+ self.accumulator = ''
+
+ current_object = self.current_object
+ if current_object:
+ current_object.comment += accumulator.strip()
+ debug("Comment: `%s'" % current_object.comment)
+
+ def start_dl(self, attrs):
+ self.flush()
+
+ do_dt = start_dl
+
+ # End of folder
+ def end_dl(self):
+ self.flush()
+ debug("End folder")
+ debug("Folder stack: %s" % dump_names(self.folder_stack))
+ if self.folder_stack:
+ del self.folder_stack[-1] # pop last folder
+ if self.folder_stack:
+ self.current_folder = self.folder_stack[-1]
+ else:
+ debug("FOLDER STACK is EMPTY!!! (1)")
+ else:
+ debug("FOLDER STACK is EMPTY!!! (2)")
+ self.current_object = None
+
+ def close(self):
+ HTMLParser.close(self)
+ if self.folder_stack:
+ raise ValueError("wrong folder stack: %s" % self.folder_stack)
+
+ def do_dd(self, attrs):
+ pass
+
+ do_p = do_dd
+
+ # Start ruler
+ def do_hr(self, attrs):
+ self.flush()
+ debug("Ruler")
+ self.current_folder.append(Ruler())
+ self.current_object = None
+ self.objects += 1
+
+ # BR in comment
+ def do_br(self, attrs):
+ self.accumulator += "<BR>"
+
+ # Allow < in the text
+ def unknown_starttag(self, tag, attrs):
+ self.accumulator += "<%s>" % tag
+
+ # Do not allow unknow end tags
+ def unknown_endtag(self, tag):
+ raise NotImplementedError("Unknow end tag `%s'" % tag)
log_file = None
def report_dup(href, object_no):
- s = "Duplicate URL: %s (first at rec. %d)" % (href, object_no)
+ s = "Duplicate URL: %s (first at rec. %d)" % (href, object_no)
- if log_file:
- log_file.write("%s\n" % s)
- else:
- print(s)
+ if log_file:
+ log_file.write("%s\n" % s)
+ else:
+ print(s)
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "sl:")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "sl:")
- report_stats = 1
- global log_file
- log_filename = None
+ report_stats = 1
+ global log_file
+ log_filename = None
- for _opt, _arg in optlist:
- if _opt == '-s':
- report_stats = 0
- if _opt == '-l':
- log_filename = _arg
- try:
- del _opt, _arg
- except NameError:
- pass
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ if _opt == '-l':
+ log_filename = _arg
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
- if report_stats:
- print("Broytman check_dups, Copyright (C) 2000-2017 PhiloSoft Design")
+ if report_stats:
+ print("Broytman check_dups, Copyright (C) 2000-2017 PhiloSoft Design")
- if args:
- sys.stderr.write("check_urls: too many arguments\n")
- sys.stderr.write("Usage: check_urls [-s] [-l logfile]\n")
- sys.exit(1)
+ if args:
+ sys.stderr.write("check_urls: too many arguments\n")
+ sys.stderr.write("Usage: check_urls [-s] [-l logfile]\n")
+ sys.exit(1)
- if log_filename:
- log_file = open(log_filename, 'w')
+ if log_filename:
+ log_file = open(log_filename, 'w')
- from storage import storage
- storage = storage()
+ from storage import storage
+ storage = storage()
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
- root_folder = storage.load()
- from bkmk_objects import make_linear
- make_linear(root_folder)
- objects = len(root_folder.linear)
+ root_folder = storage.load()
+ from bkmk_objects import make_linear
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
- if report_stats:
- print("Ok")
+ if report_stats:
+ print("Ok")
- dup_dict = {}
+ dup_dict = {}
- for object_no in range(objects):
- object = root_folder.linear[object_no]
+ for object_no in range(objects):
+ object = root_folder.linear[object_no]
- if object.isBookmark:
- href = object.href
- if dup_dict.has_key(href):
- report_dup(href, dup_dict[href])
- else:
- dup_dict[href] = object_no
+ if object.isBookmark:
+ href = object.href
+ if dup_dict.has_key(href):
+ report_dup(href, dup_dict[href])
+ else:
+ dup_dict[href] = object_no
- if log_filename:
- log_file.close()
+ if log_filename:
+ log_file.close()
- if report_stats:
- print("Ok")
- print(objects, "objects passed")
+ if report_stats:
+ print("Ok")
+ print(objects, "objects passed")
if __name__ == '__main__':
- run()
+ run()
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "s")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "s")
- report_stats = 1
+ report_stats = 1
- for _opt, _arg in optlist:
- if _opt == '-s':
- report_stats = 0
- try:
- del _opt, _arg
- except NameError:
- pass
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
- if report_stats:
- print("Broytman check_title, Copyright (C) 2002-2017 PhiloSoft Design")
+ if report_stats:
+ print("Broytman check_title, Copyright (C) 2002-2017 PhiloSoft Design")
- if args:
- sys.stderr.write("check_title: too many arguments\n")
- sys.stderr.write("Usage: check_title [-s]\n")
- sys.exit(1)
+ if args:
+ sys.stderr.write("check_title: too many arguments\n")
+ sys.stderr.write("Usage: check_title [-s]\n")
+ sys.exit(1)
- from storage import storage
- storage = storage()
+ from storage import storage
+ storage = storage()
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
- root_folder = storage.load()
- make_linear(root_folder)
- objects = len(root_folder.linear)
+ root_folder = storage.load()
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
- if report_stats:
- print("Ok")
+ if report_stats:
+ print("Ok")
- for object_no in range(objects):
- object = root_folder.linear[object_no]
+ for object_no in range(objects):
+ object = root_folder.linear[object_no]
- if object.isBookmark:
- if hasattr(object, "moved") or hasattr(object, "error") \
- or object.href.startswith('place:'): # Firefox SmartBookmarks
- continue
+ if object.isBookmark:
+ if hasattr(object, "moved") or hasattr(object, "error") \
+ or object.href.startswith('place:'): # Firefox SmartBookmarks
+ continue
- if hasattr(object, "real_title") and (object.real_title is not None):
- unquoted_title = unquote_title(quote_title(object.real_title))
- unquoted_name = unquote_title(object.name)
- if unquoted_name != unquoted_title:
- print(object.href)
- print(unquoted_name)
- print(unquoted_title)
- print()
- else:
- print(object.href)
- print(object.name)
- print("NO REAL TITLE!!!")
- print()
+ if hasattr(object, "real_title") and (object.real_title is not None):
+ unquoted_title = unquote_title(quote_title(object.real_title))
+ unquoted_name = unquote_title(object.name)
+ if unquoted_name != unquoted_title:
+ print(object.href)
+ print(unquoted_name)
+ print(unquoted_title)
+ print()
+ else:
+ print(object.href)
+ print(object.name)
+ print("NO REAL TITLE!!!")
+ print()
- if report_stats:
- print(objects, "objects passed")
+ if report_stats:
+ print(objects, "objects passed")
if __name__ == '__main__':
- run()
+ run()
def run():
- print("Broytman check_url, Copyright (C) 2010-2017 PhiloSoft Design")
+ print("Broytman check_url, Copyright (C) 2010-2017 PhiloSoft Design")
- if len(sys.argv) < 2:
- sys.stderr.write("Usage: check_url.py url1 [url2...]\n")
- sys.exit(1)
+ if len(sys.argv) < 2:
+ sys.stderr.write("Usage: check_url.py url1 [url2...]\n")
+ sys.exit(1)
- from m_lib.flog import makelog
- log = makelog("check.log")
+ from m_lib.flog import makelog
+ log = makelog("check.log")
- from robots import robot
- robot = robot(log)
+ from robots import robot
+ robot = robot(log)
- for url in sys.argv[1:]:
- bookmark = Bookmark(href=url, add_date=None)
- bookmark.parent = None
+ for url in sys.argv[1:]:
+ bookmark = Bookmark(href=url, add_date=None)
+ bookmark.parent = None
- rcode = robot.check_url(bookmark)
- print("check_url: %s" % rcode)
+ rcode = robot.check_url(bookmark)
+ print("check_url: %s" % rcode)
- if hasattr(bookmark, 'error'):
- print(bookmark.error)
+ if hasattr(bookmark, 'error'):
+ print(bookmark.error)
- else:
- print("""\
-URL: %s
-Title: %s
-LastModified: %s
-IconURI: %s
-Icon: %s
-""" % (bookmark.href, getattr(bookmark, 'real_title', ''), strftime(bookmark.last_modified), bookmark.icon_href, bookmark.icon))
+ else:
+ print("""\
+ URL: %s
+ Title: %s
+ LastModified: %s
+ IconURI: %s
+ Icon: %s
+ """ % (bookmark.href, getattr(bookmark, 'real_title', ''), strftime(bookmark.last_modified), bookmark.icon_href, bookmark.icon))
- robot.stop()
- log.close()
+ robot.stop()
+ log.close()
if __name__ == '__main__':
- run()
+ run()
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "ise")
-
- show_pbar = 1
- report_stats = 1
- only_errors = 0
-
- for _opt, _arg in optlist:
- if _opt == '-i':
- show_pbar = 0
- if _opt == '-s':
- report_stats = 0
- if _opt == '-e':
- only_errors = 1
- try:
- del _opt, _arg
- except NameError:
- pass
-
- if report_stats:
- print("Broytman check_urls, Copyright (C) 1997-2017 PhiloSoft Design")
-
- if args:
- sys.stderr.write("check_urls: too many arguments\n")
- sys.stderr.write("Usage: check_urls [-ise]\n")
- sys.exit(1)
-
- if show_pbar:
- show_pbar = sys.stderr.isatty()
-
- if show_pbar:
- try:
- from m_lib.pbar.tty_pbar import ttyProgressBar
- except ImportError:
- show_pbar = 0
-
- from m_lib.flog import makelog, openlog
- if only_errors:
- log = openlog("check.log")
- log("chk_urls restarted for errors")
- if report_stats:
- print("chk_urls restarted for errors")
- else:
- log = makelog("check.log")
- log("check_urls started")
- if report_stats:
- print(" check_urls: normal start")
-
- from storage import storage
- storage = storage()
-
- from robots import robot
- robot = robot(log)
-
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
-
- root_folder = storage.load()
- from bkmk_objects import make_linear, break_tree
- make_linear(root_folder)
- objects = len(root_folder.linear)
-
- if report_stats:
- print("Ok")
-
- if report_stats:
- if only_errors:
- s = "Rechecking errors: "
- else:
- s = "Checking: "
- sys.stdout.write(s)
- sys.stdout.flush()
-
- if show_pbar:
- pbar = ttyProgressBar(0, objects)
-
- urls_no = 0
- object_count = 0
- size = 0
-
- checked = {}
- rcode = 1
-
- for object_no in range(objects):
- if show_pbar:
- pbar.display(object_no+1)
-
- object = root_folder.linear[object_no]
- object_count = object_count + 1
-
- if object.isBookmark:
- href = object.href
- if (href.startswith('place:') # Firefox SmartBookmarks
- or '%s' in href): # Bookmark with keyword
- log("Skipped %s" % href)
- continue
-
- if only_errors:
- if hasattr(object, "error"):
- delattr(object, "error")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "ise")
+
+ show_pbar = 1
+ report_stats = 1
+ only_errors = 0
+
+ for _opt, _arg in optlist:
+ if _opt == '-i':
+ show_pbar = 0
+ if _opt == '-s':
+ report_stats = 0
+ if _opt == '-e':
+ only_errors = 1
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if report_stats:
+ print("Broytman check_urls, Copyright (C) 1997-2017 PhiloSoft Design")
+
+ if args:
+ sys.stderr.write("check_urls: too many arguments\n")
+ sys.stderr.write("Usage: check_urls [-ise]\n")
+ sys.exit(1)
+
+ if show_pbar:
+ show_pbar = sys.stderr.isatty()
+
+ if show_pbar:
+ try:
+ from m_lib.pbar.tty_pbar import ttyProgressBar
+ except ImportError:
+ show_pbar = 0
+
+ from m_lib.flog import makelog, openlog
+ if only_errors:
+ log = openlog("check.log")
+ log("chk_urls restarted for errors")
+ if report_stats:
+ print("chk_urls restarted for errors")
+ else:
+ log = makelog("check.log")
+ log("check_urls started")
+ if report_stats:
+ print(" check_urls: normal start")
+
+ from storage import storage
+ storage = storage()
+
+ from robots import robot
+ robot = robot(log)
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+ from bkmk_objects import make_linear, break_tree
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
+
+ if report_stats:
+ print("Ok")
+
+ if report_stats:
+ if only_errors:
+ s = "Rechecking errors: "
+ else:
+ s = "Checking: "
+ sys.stdout.write(s)
+ sys.stdout.flush()
+
+ if show_pbar:
+ pbar = ttyProgressBar(0, objects)
+
+ urls_no = 0
+ object_count = 0
+ size = 0
+
+ checked = {}
+ rcode = 1
+
+ for object_no in range(objects):
+ if show_pbar:
+ pbar.display(object_no+1)
+
+ object = root_folder.linear[object_no]
+ object_count = object_count + 1
+
+ if object.isBookmark:
+ href = object.href
+ if (href.startswith('place:') # Firefox SmartBookmarks
+ or '%s' in href): # Bookmark with keyword
+ log("Skipped %s" % href)
+ continue
+
+ if only_errors:
+ if hasattr(object, "error"):
+ delattr(object, "error")
+ else:
+ continue
+
+ if checked.has_key(href):
+ log("Already checked %s" % href)
+ old_object = root_folder.linear[checked[href]]
+ for attr_name in ("last_visit", "last_modified",
+ "error", "no_error", "moved", "size", "md5", "real_title",
+ "last_tested", "test_time", "icon", "charset"):
+ if hasattr(old_object, attr_name):
+ setattr(object, attr_name, getattr(old_object, attr_name))
else:
- continue
-
- if checked.has_key(href):
- log("Already checked %s" % href)
- old_object = root_folder.linear[checked[href]]
- for attr_name in ("last_visit", "last_modified",
- "error", "no_error", "moved", "size", "md5", "real_title",
- "last_tested", "test_time", "icon", "charset"):
- if hasattr(old_object, attr_name):
- setattr(object, attr_name, getattr(old_object, attr_name))
- else:
- log("Checking %s" % href)
- rcode = robot.check_url(object)
-
- if rcode:
- checked[href] = object_no
- urls_no = urls_no + 1
- try:
- size = size + int(object.size)
- except (AttributeError, TypeError, ValueError):
- pass # Some object does not have a size :(
- else:
- log("Interrupted by user (^C)")
- break
- robot.stop()
-
- if show_pbar:
- del pbar
-
- if report_stats:
- print("Ok")
- print(object_count, "objects passed")
- print(urls_no, "URLs checked")
- print(size, "bytes eaten")
-
- break_tree(root_folder.linear)
- storage.store(root_folder)
-
- if rcode:
- log("check_urls finished ok")
- log.close()
+ log("Checking %s" % href)
+ rcode = robot.check_url(object)
+
+ if rcode:
+ checked[href] = object_no
+ urls_no = urls_no + 1
+ try:
+ size = size + int(object.size)
+ except (AttributeError, TypeError, ValueError):
+ pass # Some object does not have a size :(
+ else:
+ log("Interrupted by user (^C)")
+ break
+ robot.stop()
+
+ if show_pbar:
+ del pbar
+
+ if report_stats:
+ print("Ok")
+ print(object_count, "objects passed")
+ print(urls_no, "URLs checked")
+ print(size, "bytes eaten")
+
+ break_tree(root_folder.linear)
+ storage.store(root_folder)
+
+ if rcode:
+ log("check_urls finished ok")
+ log.close()
if __name__ == '__main__':
- run()
+ run()
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "s")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "s")
- report_stats = 1
+ report_stats = 1
- for _opt, _arg in optlist:
- if _opt == '-s':
- report_stats = 0
- try:
- del _opt, _arg
- except NameError:
- pass
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
- if len(args) != 1:
- sys.stderr.write("convert_st: too many or too few arguments\n")
- sys.stderr.write("Usage: convert_st [-s] new_storage\n")
- sys.exit(1)
+ if len(args) != 1:
+ sys.stderr.write("convert_st: too many or too few arguments\n")
+ sys.stderr.write("Usage: convert_st [-s] new_storage\n")
+ sys.exit(1)
- from bkmk_objects import parse_params, set_params
- from storage import storage, import_storage
+ from bkmk_objects import parse_params, set_params
+ from storage import storage, import_storage
- storage = storage()
+ storage = storage()
- storage_name, storage_params = parse_params(args[0])
- new_storage = import_storage(storage_name)
- set_params(new_storage, storage_params)
- new_storage = new_storage()
+ storage_name, storage_params = parse_params(args[0])
+ new_storage = import_storage(storage_name)
+ set_params(new_storage, storage_params)
+ new_storage = new_storage()
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
- root_folder = storage.load()
+ root_folder = storage.load()
- if report_stats:
- print("Ok")
- sys.stdout.write("Converting to %s: " % new_storage.filename)
- sys.stdout.flush()
+ if report_stats:
+ print("Ok")
+ sys.stdout.write("Converting to %s: " % new_storage.filename)
+ sys.stdout.flush()
- new_storage.store(root_folder)
+ new_storage.store(root_folder)
- if report_stats:
- print("Ok")
+ if report_stats:
+ print("Ok")
if __name__ == '__main__':
- run()
+ run()
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "sp:o:t:r")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "sp:o:t:r")
- report_stats = 1
- prune = None
+ report_stats = 1
+ prune = None
- from writers import writer
- output_filename = writer.filename
+ from writers import writer
+ output_filename = writer.filename
- transl = 0
- transl_name = "" # dictionary translation; default is no translation
+ transl = 0
+ transl_name = "" # dictionary translation; default is no translation
- for _opt, _arg in optlist:
- if _opt == '-s':
- report_stats = 0
- if _opt == '-p':
- prune = _arg
- if _opt == '-o':
- output_filename = _arg
- if _opt == '-t':
- transl = 1
- transl_name = _arg
- if _opt == '-r':
- transl = 2
- try:
- del _opt, _arg
- except NameError:
- pass
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ if _opt == '-p':
+ prune = _arg
+ if _opt == '-o':
+ output_filename = _arg
+ if _opt == '-t':
+ transl = 1
+ transl_name = _arg
+ if _opt == '-r':
+ transl = 2
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
- if args:
- sys.stderr.write("db2bkmk: too many arguments\n")
- sys.stderr.write("Usage: db2bkmk [-s] [-p prune_folder] [-o filename] [-t trans] [-r]\n")
- sys.exit(1)
+ if args:
+ sys.stderr.write("db2bkmk: too many arguments\n")
+ sys.stderr.write("Usage: db2bkmk [-s] [-p prune_folder] [-o filename] [-t trans] [-r]\n")
+ sys.exit(1)
- from storage import storage
- storage = storage()
+ from storage import storage
+ storage = storage()
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
- root_folder = storage.load()
+ root_folder = storage.load()
- if report_stats:
- print("Ok")
- sys.stdout.write("Writing %s: " % output_filename)
- sys.stdout.flush()
+ if report_stats:
+ print("Ok")
+ sys.stdout.write("Writing %s: " % output_filename)
+ sys.stdout.flush()
- if transl:
- new_ext = str(transl)
- transl_d = {}
+ if transl:
+ new_ext = str(transl)
+ transl_d = {}
- from m_lib.flad import fladm
- transl_db = fladm.load_from_file(transl_name, fladm.check_record, ["URL1", "URL2"], [""])
- # This prevents any other key to appear in transl_db ^
+ from m_lib.flad import fladm
+ transl_db = fladm.load_from_file(transl_name, fladm.check_record, ["URL1", "URL2"], [""])
+ # This prevents any other key to appear in transl_db ^
- # Generate translation dictionary (hash table)
- if transl == 1:
- for record in transl_db:
- transl_d[record["URL1"]] = record["URL2"]
- elif transl == 2:
- for record in transl_db:
- transl_d[record["URL2"]] = record["URL1"]
- else:
- raise ValueError("transl (%d) must be 1 or 2" % transl)
+ # Generate translation dictionary (hash table)
+ if transl == 1:
+ for record in transl_db:
+ transl_d[record["URL1"]] = record["URL2"]
+ elif transl == 2:
+ for record in transl_db:
+ transl_d[record["URL2"]] = record["URL1"]
+ else:
+ raise ValueError("transl (%d) must be 1 or 2" % transl)
- del transl_db # Save few bytes of memory
+ del transl_db # Save few bytes of memory
- from bkmk_objects import Walker
- class Transl(Walker):
- def __init__(self, transl_d):
- self.transl_d = transl_d
+ from bkmk_objects import Walker
+ class Transl(Walker):
+ def __init__(self, transl_d):
+ self.transl_d = transl_d
- def bookmark(self, b, level):
- href = b.href
- transl_d = self.transl_d
+ def bookmark(self, b, level):
+ href = b.href
+ transl_d = self.transl_d
- if transl_d.has_key(href):
- b.href = transl_d[href]
+ if transl_d.has_key(href):
+ b.href = transl_d[href]
- root_folder.walk_depth(Transl(transl_d))
+ root_folder.walk_depth(Transl(transl_d))
- outfile = open(output_filename, 'w')
- root_folder.walk_depth(writer(outfile, prune))
- outfile.close()
+ outfile = open(output_filename, 'w')
+ root_folder.walk_depth(writer(outfile, prune))
+ outfile.close()
- if report_stats:
- print("Ok")
+ if report_stats:
+ print("Ok")
if __name__ == '__main__':
- run()
+ run()
def main():
- import sys
- from .bkmk_parse_html import universal_charset
-
- l = len(sys.argv)
- if l == 3:
- filename = sys.argv[1]
- charset = sys.argv[2]
- elif l == 2:
- filename = sys.argv[1]
- charset = universal_charset
- else:
- sys.exit("Usage: main filename [charset]")
-
- parser = parse_filename(filename, charset, log=lambda s: sys.stdout.write(s + '\n'))
- print(" refresh:", parser.refresh)
- print(" icon :", parser.icon)
+ import sys
+ from .bkmk_parse_html import universal_charset
+
+ l = len(sys.argv)
+ if l == 3:
+ filename = sys.argv[1]
+ charset = sys.argv[2]
+ elif l == 2:
+ filename = sys.argv[1]
+ charset = universal_charset
+ else:
+ sys.exit("Usage: main filename [charset]")
+
+ parser = parse_filename(filename, charset, log=lambda s: sys.stdout.write(s + '\n'))
+ print(" refresh:", parser.refresh)
+ print(" icon :", parser.icon)
if __name__ == '__main__':
main()
parsers = []
try:
- from . import bkmk_ph_beautifulsoup
+ from . import bkmk_ph_beautifulsoup
except ImportError:
- pass
+ pass
else:
- bkmk_ph_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
- parsers.append(bkmk_ph_beautifulsoup.parse_html)
+ bkmk_ph_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
+ parsers.append(bkmk_ph_beautifulsoup.parse_html)
try:
- from . import bkmk_ph_html5
+ from . import bkmk_ph_html5
except ImportError:
- pass
+ pass
else:
- parsers.append(bkmk_ph_html5.parse_html)
+ parsers.append(bkmk_ph_html5.parse_html)
try:
- from . import bkmk_ph_lxml
+ from . import bkmk_ph_lxml
except ImportError:
- pass
+ pass
else:
parsers.append(bkmk_ph_lxml.parse_html)
try:
- from . import bkmk_ph_htmlparser
+ from . import bkmk_ph_htmlparser
except ImportError:
- pass
+ pass
else:
parsers.append(bkmk_ph_htmlparser.parse_html)
num_entity_re = re.compile("(&#[0-9]+;)")
def recode_entities(title, charset):
- output = []
- for part in entity_re.split(title):
- if part not in ("&", "<", ">", """) and \
- entity_re.match(part):
- _part = name2codepoint.get(part[1:-1], None)
- if _part is not None:
- part = unichr(_part).encode(charset)
- output.append(part)
- title = ''.join(output)
-
- output = []
- for part in num_entity_re.split(title):
- if num_entity_re.match(part):
- try:
- part = unichr(int(part[2:-1])).encode(charset)
- except UnicodeEncodeError:
- pass # Leave the entity as is
- output.append(part)
-
- return ''.join(output)
+ output = []
+ for part in entity_re.split(title):
+ if part not in ("&", "<", ">", """) and \
+ entity_re.match(part):
+ _part = name2codepoint.get(part[1:-1], None)
+ if _part is not None:
+ part = unichr(_part).encode(charset)
+ output.append(part)
+ title = ''.join(output)
+
+ output = []
+ for part in num_entity_re.split(title):
+ if num_entity_re.match(part):
+ try:
+ part = unichr(int(part[2:-1])).encode(charset)
+ except UnicodeEncodeError:
+ pass # Leave the entity as is
+ output.append(part)
+
+ return ''.join(output)
import os
BKMK_DEBUG_HTML_PARSERS = os.environ.get("BKMK_DEBUG_HTML_PARSERS")
def parse_html(html_text, charset=None, log=None):
- if not parsers:
- return None
-
- if charset:
- try:
- codecs.lookup(charset) # In case of unknown charset...
- except (ValueError, LookupError):
- charset = None # ...try charset from HTML
-
- charsets = [universal_charset, DEFAULT_CHARSET]
- if charset:
- charset = charset.lower().replace("windows-", "cp")
- if charset in charsets:
- charsets.remove(charset)
- charsets.insert(0, charset)
-
- if BKMK_DEBUG_HTML_PARSERS:
- _parsers = []
- for p in parsers:
- parser = None
- for c in charsets:
- try:
- parser = p(html_text, c, log)
- except UnicodeError:
- pass
- else:
- if parser:
- if BKMK_DEBUG_HTML_PARSERS:
- if log: log(" Parser %s: ok" % p.__module__)
- _parsers.append((p, parser))
- break
- else:
- if log: log(" Parser %s: fail" % p.__module__)
- if not BKMK_DEBUG_HTML_PARSERS and parser:
- break
-
- if BKMK_DEBUG_HTML_PARSERS:
- if not _parsers:
- if log: log(" All parsers have failed")
- return None
- elif not parser:
- if log: log(" All parsers have failed")
- return None
-
- if BKMK_DEBUG_HTML_PARSERS:
- p, parser = _parsers[0]
- if log: log(" Using %s" % p.__module__)
-
- converted_title = title = parser.title
- if title and (not parser.charset):
- try:
- unicode(title, "ascii")
- except UnicodeDecodeError:
- parser.charset = DEFAULT_CHARSET
-
- if parser.charset:
- parser.charset = parser.charset.lower().replace("windows-", "cp")
-
- if title and parser.charset and (
- (parser.charset != universal_charset) or
- ((not charset) or (charset != parser.charset))):
- try:
- if parser.meta_charset:
- if log: log(" META charset : %s" % parser.charset)
- elif (not charset) or (charset != parser.charset):
- if log: log(" guessed charset: %s" % parser.charset)
- #if log: log(" current charset: %s" % universal_charset)
- if log: log(" title : %s" % title)
- if parser.charset != universal_charset:
+ if not parsers:
+ return None
+
+ if charset:
+ try:
+ codecs.lookup(charset) # In case of unknown charset...
+ except (ValueError, LookupError):
+ charset = None # ...try charset from HTML
+
+ charsets = [universal_charset, DEFAULT_CHARSET]
+ if charset:
+ charset = charset.lower().replace("windows-", "cp")
+ if charset in charsets:
+ charsets.remove(charset)
+ charsets.insert(0, charset)
+
+ if BKMK_DEBUG_HTML_PARSERS:
+ _parsers = []
+ for p in parsers:
+ parser = None
+ for c in charsets:
try:
- converted_title = unicode(title, parser.charset).encode(universal_charset)
+ parser = p(html_text, c, log)
except UnicodeError:
- if log: log(" incorrect conversion from %s, converting from %s" % (parser.charset, DEFAULT_CHARSET))
- converted_title = unicode(title, DEFAULT_CHARSET, "replace").encode(universal_charset, "replace")
- parser.charset = DEFAULT_CHARSET
- if log and (converted_title != title): log(" converted title: %s" % converted_title)
- except LookupError:
- if log: log(" unknown charset: '%s'" % parser.charset)
- else:
- if log: log(" title : %s" % title)
-
- if title:
- final_title = recode_entities(converted_title, universal_charset)
- parts = [s.strip() for s in final_title.replace('\r', '').split('\n')]
- final_title = ' '.join([s for s in parts if s])
- if log and (final_title != converted_title): log(" final title : %s" % final_title)
- parser.title = final_title
-
- icon = parser.icon
- if isinstance(icon, unicode):
- try:
- parser.icon = icon.encode('ascii')
- except UnicodeEncodeError:
- if parser.charset:
- parser.icon = icon.encode(parser.charset)
- return parser
+ pass
+ else:
+ if parser:
+ if BKMK_DEBUG_HTML_PARSERS:
+ if log: log(" Parser %s: ok" % p.__module__)
+ _parsers.append((p, parser))
+ break
+ else:
+ if log: log(" Parser %s: fail" % p.__module__)
+ if not BKMK_DEBUG_HTML_PARSERS and parser:
+ break
+
+ if BKMK_DEBUG_HTML_PARSERS:
+ if not _parsers:
+ if log: log(" All parsers have failed")
+ return None
+ elif not parser:
+ if log: log(" All parsers have failed")
+ return None
+
+ if BKMK_DEBUG_HTML_PARSERS:
+ p, parser = _parsers[0]
+ if log: log(" Using %s" % p.__module__)
+
+ converted_title = title = parser.title
+ if title and (not parser.charset):
+ try:
+ unicode(title, "ascii")
+ except UnicodeDecodeError:
+ parser.charset = DEFAULT_CHARSET
+
+ if parser.charset:
+ parser.charset = parser.charset.lower().replace("windows-", "cp")
+
+ if title and parser.charset and (
+ (parser.charset != universal_charset) or
+ ((not charset) or (charset != parser.charset))):
+ try:
+ if parser.meta_charset:
+ if log: log(" META charset : %s" % parser.charset)
+ elif (not charset) or (charset != parser.charset):
+ if log: log(" guessed charset: %s" % parser.charset)
+ #if log: log(" current charset: %s" % universal_charset)
+ if log: log(" title : %s" % title)
+ if parser.charset != universal_charset:
+ try:
+ converted_title = unicode(title, parser.charset).encode(universal_charset)
+ except UnicodeError:
+ if log: log(" incorrect conversion from %s, converting from %s" % (parser.charset, DEFAULT_CHARSET))
+ converted_title = unicode(title, DEFAULT_CHARSET, "replace").encode(universal_charset, "replace")
+ parser.charset = DEFAULT_CHARSET
+ if log and (converted_title != title): log(" converted title: %s" % converted_title)
+ except LookupError:
+ if log: log(" unknown charset: '%s'" % parser.charset)
+ else:
+ if log: log(" title : %s" % title)
+
+ if title:
+ final_title = recode_entities(converted_title, universal_charset)
+ parts = [s.strip() for s in final_title.replace('\r', '').split('\n')]
+ final_title = ' '.join([s for s in parts if s])
+ if log and (final_title != converted_title): log(" final title : %s" % final_title)
+ parser.title = final_title
+
+ icon = parser.icon
+ if isinstance(icon, unicode):
+ try:
+ parser.icon = icon.encode('ascii')
+ except UnicodeEncodeError:
+ if parser.charset:
+ parser.icon = icon.encode(parser.charset)
+ return parser
def parse_filename(filename, charset=None, log=None):
fp = open(filename, 'r')
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2007-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2007-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
# http://groups.google.com/group/beautifulsoup/browse_thread/thread/69093cb0d3a3cf63
class BadDeclParser(BeautifulSoup):
def parse_declaration(self, i):
- """Treat a bogus SGML declaration as raw data. Treat a CDATA
- declaration as a CData object."""
- j = None
- if self.rawdata[i:i+9] == '<![CDATA[':
- k = self.rawdata.find(']]>', i)
- if k == -1:
- k = len(self.rawdata)
- data = self.rawdata[i+9:k]
- j = k+3
- self._toStringSubclass(data, CData)
- else:
- try:
- j = SGMLParser.parse_declaration(self, i)
- except SGMLParseError:
- # Could not parse the DOCTYPE declaration
- # Try to just skip the actual declaration
- match = re.search(r'<!DOCTYPE([^>]*?)>', self.rawdata[i:], re.MULTILINE|re.IGNORECASE)
- if match:
- toHandle = self.rawdata[i:match.end()]
- else:
- toHandle = self.rawdata[i:]
- self.handle_data(toHandle)
- j = i + len(toHandle)
- return j
+ """Treat a bogus SGML declaration as raw data. Treat a CDATA
+ declaration as a CData object."""
+ j = None
+ if self.rawdata[i:i+9] == '<![CDATA[':
+ k = self.rawdata.find(']]>', i)
+ if k == -1:
+ k = len(self.rawdata)
+ data = self.rawdata[i+9:k]
+ j = k+3
+ self._toStringSubclass(data, CData)
+ else:
+ try:
+ j = SGMLParser.parse_declaration(self, i)
+ except SGMLParseError:
+ # Could not parse the DOCTYPE declaration
+ # Try to just skip the actual declaration
+ match = re.search(r'<!DOCTYPE([^>]*?)>', self.rawdata[i:], re.MULTILINE|re.IGNORECASE)
+ if match:
+ toHandle = self.rawdata[i:match.end()]
+ else:
+ toHandle = self.rawdata[i:]
+ self.handle_data(toHandle)
+ j = i + len(toHandle)
+ return j
def _parse_html(html_text, charset):
- try:
- return BadDeclParser(html_text, fromEncoding=charset)
- except TypeError:
- return None
+ try:
+ return BadDeclParser(html_text, fromEncoding=charset)
+ except TypeError:
+ return None
def parse_html(html_text, charset=None, log=None):
- root = _parse_html(html_text, charset)
- if root is None:
- return None
-
- _charset = root.originalEncoding
- if _charset in ("ISO-8859-2", "windows-1252", "MacCyrillic"): # Replace default
- _charset = DEFAULT_CHARSET
- root = _parse_html(html_text, _charset)
- if root is None:
- return None
-
- html = root.html
- if html is None:
- html = root
-
- head = html.head
- if head is None:
- head = html # Some sites put TITLE in HTML without HEAD
-
- title = head.title
- if (title is None) and (html is not head):
- # Some sites put TITLE in HTML outside of HEAD
- title = html.title
-
- if title is None:
- # Lookup TITLE in the root
- title = root.title
-
- if title is not None:
- if title.string:
- title = title.string
- else:
- parts = []
- for part in title:
- if not isinstance(part, basestring):
- part = unicode(part)
- parts.append(part.strip())
- title = ''.join(parts)
-
- meta = head.find(_find_contenttype, recursive=False)
- if meta:
- try:
- meta_content = meta.get("content")
- if meta_content:
- __charset = meta_content.lower().split('charset=')[1].split(';')[0]
- else:
- __charset = False
- except IndexError: # No charset in the META Content-Type
- meta_charset = False
- else:
- meta_charset = _charset == __charset
- else:
- meta_charset = False
-
- if not meta_charset:
- meta = head.find(_find_charset, recursive=False)
- if meta:
- meta_content = meta.get("charset")
- if meta_content:
- meta_charset = _charset = meta_content.lower()
-
- if title and (_charset or meta_charset):
- title = title.encode(_charset or meta_charset)
-
- meta = head.find(_find_refresh, recursive=False)
- if meta:
- refresh = meta.get("content")
- else:
- refresh = None
-
- meta = head.find(_find_icon, recursive=False)
- if meta:
- icon = meta.get("href")
- else:
- icon = None
-
- if (title is None) and (refresh is None) and (icon is None):
- return None
- return HTMLParser(_charset, meta_charset, title, refresh, icon)
+ root = _parse_html(html_text, charset)
+ if root is None:
+ return None
+
+ _charset = root.originalEncoding
+ if _charset in ("ISO-8859-2", "windows-1252", "MacCyrillic"): # Replace default
+ _charset = DEFAULT_CHARSET
+ root = _parse_html(html_text, _charset)
+ if root is None:
+ return None
+
+ html = root.html
+ if html is None:
+ html = root
+
+ head = html.head
+ if head is None:
+ head = html # Some sites put TITLE in HTML without HEAD
+
+ title = head.title
+ if (title is None) and (html is not head):
+ # Some sites put TITLE in HTML outside of HEAD
+ title = html.title
+
+ if title is None:
+ # Lookup TITLE in the root
+ title = root.title
+
+ if title is not None:
+ if title.string:
+ title = title.string
+ else:
+ parts = []
+ for part in title:
+ if not isinstance(part, basestring):
+ part = unicode(part)
+ parts.append(part.strip())
+ title = ''.join(parts)
+
+ meta = head.find(_find_contenttype, recursive=False)
+ if meta:
+ try:
+ meta_content = meta.get("content")
+ if meta_content:
+ __charset = meta_content.lower().split('charset=')[1].split(';')[0]
+ else:
+ __charset = False
+ except IndexError: # No charset in the META Content-Type
+ meta_charset = False
+ else:
+ meta_charset = _charset == __charset
+ else:
+ meta_charset = False
+
+ if not meta_charset:
+ meta = head.find(_find_charset, recursive=False)
+ if meta:
+ meta_content = meta.get("charset")
+ if meta_content:
+ meta_charset = _charset = meta_content.lower()
+
+ if title and (_charset or meta_charset):
+ title = title.encode(_charset or meta_charset)
+
+ meta = head.find(_find_refresh, recursive=False)
+ if meta:
+ refresh = meta.get("content")
+ else:
+ refresh = None
+
+ meta = head.find(_find_icon, recursive=False)
+ if meta:
+ icon = meta.get("href")
+ else:
+ icon = None
+
+ if (title is None) and (refresh is None) and (icon is None):
+ return None
+ return HTMLParser(_charset, meta_charset, title, refresh, icon)
def _find_contenttype(Tag):
- return (Tag.name == "meta") and \
- (Tag.get("http-equiv", '').lower() == "content-type")
+ return (Tag.name == "meta") and \
+ (Tag.get("http-equiv", '').lower() == "content-type")
def _find_charset(Tag):
- return (Tag.name == "meta") and Tag.get("charset", '')
+ return (Tag.name == "meta") and Tag.get("charset", '')
def _find_refresh(Tag):
- return (Tag.name == "meta") and \
- (Tag.get("http-equiv", '').lower() == "refresh")
+ return (Tag.name == "meta") and \
+ (Tag.get("http-equiv", '').lower() == "refresh")
def _find_icon(Tag):
- return (Tag.name == "link") and \
- (Tag.get("rel", '').lower() in ('icon', 'shortcut icon'))
+ return (Tag.name == "link") and \
+ (Tag.get("rel", '').lower() in ('icon', 'shortcut icon'))
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
except IndexError:
meta_charset = False
elif m.get('charset', ''):
- meta_charset = m.get('charset').lower()
- break
+ meta_charset = m.get('charset').lower()
+ break
else:
meta_charset = False
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
class HTMLParser(_HTMLParser):
- def __init__(self, charset=None):
- _HTMLParser.__init__(self)
- self.charset = charset
- self.meta_charset = 0
- self.title = None
- self.refresh = None
- self.icon = None
-
- def end_head(self):
- raise HTMLHeadDone()
-
- def do_meta(self, attrs):
- http_equiv = ""
- content = ""
-
- for attrname, value in attrs:
- if value:
- value = value.strip()
- if attrname == 'http-equiv':
- http_equiv = value.lower()
- elif attrname == 'content':
- content = value
- elif (attrname == 'charset') and (not self.charset):
- self.charset = value.lower()
- self.meta_charset = 1
-
- if (not self.charset) and (http_equiv == "content-type"):
- try:
- # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
- self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0]
- self.meta_charset = 1 # Remember that the charset was retrieved from
- # META tag, not from the Content-Type header
- except IndexError:
- pass
-
- if http_equiv == "refresh":
- self.refresh = content
-
- def start_title(self, attrs):
- self.accumulator = ''
-
- def end_title(self):
- if not self.title: # use only the first title
- self.title = self.accumulator
-
- def do_link(self, attrs):
- has_icon = False
- href = None
-
- for attrname, value in attrs:
- if value:
- value = value.strip()
- if (attrname == 'rel') and (value.lower() in ('icon', 'shortcut icon')):
- has_icon = True
- elif attrname == 'href':
- href = value
-
- if has_icon:
- self.icon = href
+ def __init__(self, charset=None):
+ _HTMLParser.__init__(self)
+ self.charset = charset
+ self.meta_charset = 0
+ self.title = None
+ self.refresh = None
+ self.icon = None
+
+ def end_head(self):
+ raise HTMLHeadDone()
+
+ def do_meta(self, attrs):
+ http_equiv = ""
+ content = ""
+
+ for attrname, value in attrs:
+ if value:
+ value = value.strip()
+ if attrname == 'http-equiv':
+ http_equiv = value.lower()
+ elif attrname == 'content':
+ content = value
+ elif (attrname == 'charset') and (not self.charset):
+ self.charset = value.lower()
+ self.meta_charset = 1
+
+ if (not self.charset) and (http_equiv == "content-type"):
+ try:
+ # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+ self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0]
+ self.meta_charset = 1 # Remember that the charset was retrieved from
+ # META tag, not from the Content-Type header
+ except IndexError:
+ pass
+
+ if http_equiv == "refresh":
+ self.refresh = content
+
+ def start_title(self, attrs):
+ self.accumulator = ''
+
+ def end_title(self):
+ if not self.title: # use only the first title
+ self.title = self.accumulator
+
+ def do_link(self, attrs):
+ has_icon = False
+ href = None
+
+ for attrname, value in attrs:
+ if value:
+ value = value.strip()
+ if (attrname == 'rel') and (value.lower() in ('icon', 'shortcut icon')):
+ has_icon = True
+ elif attrname == 'href':
+ href = value
+
+ if has_icon:
+ self.icon = href
def parse_html(html_text, charset=None, log=None):
- parser = HTMLParser(charset)
+ parser = HTMLParser(charset)
- try:
- parser.feed(html_text)
- except (HTMLParseError, HTMLHeadDone):
- pass
+ try:
+ parser.feed(html_text)
+ except (HTMLParseError, HTMLHeadDone):
+ pass
- try:
- parser.close()
- except (HTMLParseError, HTMLHeadDone):
- pass
+ try:
+ parser.close()
+ except (HTMLParseError, HTMLHeadDone):
+ pass
- if (parser.title is None) and (parser.refresh is None) and (parser.icon is None):
- return None
- return parser
+ if (parser.title is None) and (parser.refresh is None) and (parser.icon is None):
+ return None
+ return parser
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
except IndexError:
meta_charset = False
elif m.get('charset', ''):
- meta_charset = m.get('charset').lower()
- break
+ meta_charset = m.get('charset').lower()
+ break
else:
meta_charset = False
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['HTMLParser']
from HTMLParser import HTMLParser
class HTMLParser(HTMLParser):
- def __init__(self, charset, meta_charset, title, refresh, icon):
- object.__init__(self)
- self.charset = charset
- self.meta_charset = meta_charset
- self.title = title
- self.refresh = refresh
- self.icon = icon
+ def __init__(self, charset, meta_charset, title, refresh, icon):
+ object.__init__(self)
+ self.charset = charset
+ self.meta_charset = meta_charset
+ self.title = title
+ self.refresh = refresh
+ self.icon = icon
robot_name, robot_params = parse_params(environ.get("BKMK_ROBOT", "forking"))
def import_robot(robot_name):
- exec("from Robots import bkmk_r%s" % robot_name)
- exec("robot = bkmk_r%s.robot_%s" % (robot_name, robot_name))
- return robot
+ exec("from Robots import bkmk_r%s" % robot_name)
+ exec("robot = bkmk_r%s.robot_%s" % (robot_name, robot_name))
+ return robot
robot = import_robot(robot_name)
set_params(robot, robot_params)
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "s")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "s")
- report_stats = 1
+ report_stats = 1
- for _opt, _arg in optlist:
- if _opt == '-s':
- report_stats = 0
- try:
- del _opt, _arg
- except NameError:
- pass
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
- if report_stats:
- print("Broytman set-real_title, Copyright (C) 2003-2017 PhiloSoft Design")
+ if report_stats:
+ print("Broytman set-real_title, Copyright (C) 2003-2017 PhiloSoft Design")
- if args:
- sys.stderr.write("set-real_title: too many arguments\n")
- sys.stderr.write("Usage: set-real_title [-s]\n")
- sys.exit(1)
+ if args:
+ sys.stderr.write("set-real_title: too many arguments\n")
+ sys.stderr.write("Usage: set-real_title [-s]\n")
+ sys.exit(1)
- from storage import storage
- storage = storage()
+ from storage import storage
+ storage = storage()
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
- root_folder = storage.load()
- from bkmk_objects import make_linear
- make_linear(root_folder)
- objects = len(root_folder.linear)
+ root_folder = storage.load()
+ from bkmk_objects import make_linear
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
- if report_stats:
- print("Ok")
+ if report_stats:
+ print("Ok")
- changed = 0
- for object_no in range(objects):
- object = root_folder.linear[object_no]
+ changed = 0
+ for object_no in range(objects):
+ object = root_folder.linear[object_no]
- if object.isBookmark:
- if not hasattr(object, "real_title"):
- continue
+ if object.isBookmark:
+ if not hasattr(object, "real_title"):
+ continue
- real_title = object.real_title
- if not real_title:
- real_title = object.href
- if object.name != real_title:
- object.name = real_title
- changed += 1
+ real_title = object.real_title
+ if not real_title:
+ real_title = object.href
+ if object.name != real_title:
+ object.name = real_title
+ changed += 1
- if changed and report_stats:
- sys.stdout.write("Saving %s: " % storage.filename)
- sys.stdout.flush()
+ if changed and report_stats:
+ sys.stdout.write("Saving %s: " % storage.filename)
+ sys.stdout.flush()
- if not changed and report_stats:
- sys.stdout.write("No need to save data\n")
- sys.stdout.flush()
+ if not changed and report_stats:
+ sys.stdout.write("No need to save data\n")
+ sys.stdout.flush()
- if changed:
- storage.store(root_folder)
+ if changed:
+ storage.store(root_folder)
- if changed and report_stats:
- print("Ok")
- print(objects, "objects passed")
- print(changed, "objects changed")
+ if changed and report_stats:
+ print("Ok")
+ print(objects, "objects passed")
+ print(changed, "objects changed")
if __name__ == '__main__':
- run()
+ run()
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "s")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "s")
- report_stats = 1
+ report_stats = 1
- for _opt, _arg in optlist:
- if _opt == '-s':
- report_stats = 0
- try:
- del _opt, _arg
- except NameError:
- pass
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
- if report_stats:
- print("Broytman set-title-list, Copyright (C) 2003-2017 PhiloSoft Design")
+ if report_stats:
+ print("Broytman set-title-list, Copyright (C) 2003-2017 PhiloSoft Design")
- if len(args) != 1:
- sys.stderr.write("Usage: set-title-list [-s] title_list_file\n")
- sys.exit(1)
+ if len(args) != 1:
+ sys.stderr.write("Usage: set-title-list [-s] title_list_file\n")
+ sys.exit(1)
- # Read the external file with titles and build a mapping (URL => title)
- titles_dict = {}
+ # Read the external file with titles and build a mapping (URL => title)
+ titles_dict = {}
- URL = None
- title = None
+ URL = None
+ title = None
- title_list_file = open(args[0], 'r')
- for line in title_list_file:
- line = line[:-1] # strip trailing newline
- if URL is None:
- URL = line
+ title_list_file = open(args[0], 'r')
+ for line in title_list_file:
+ line = line[:-1] # strip trailing newline
+ if URL is None:
+ URL = line
- elif title is None:
- title = line
+ elif title is None:
+ title = line
- elif line: # the third line in every 3 lines must be empty
- raise ValueError("line is not empty for URL `%s', title `%s': line `%s'" % (URL, title, line))
+ elif line: # the third line in every 3 lines must be empty
+ raise ValueError("line is not empty for URL `%s', title `%s': line `%s'" % (URL, title, line))
- else: # We've got 3 lines - add new entry to the mapping
- if titles_dict.has_key(URL):
- if title != titles_dict[URL]:
- raise ValueError("titles are not identical for URL `%s': `%s' != `%s'" % (URL, title, titles_dict[URL]))
+ else: # We've got 3 lines - add new entry to the mapping
+ if titles_dict.has_key(URL):
+ if title != titles_dict[URL]:
+ raise ValueError("titles are not identical for URL `%s': `%s' != `%s'" % (URL, title, titles_dict[URL]))
- else:
- titles_dict[URL] = title
+ else:
+ titles_dict[URL] = title
- # reset
- URL = None
- title = None
+ # reset
+ URL = None
+ title = None
- title_list_file.close()
+ title_list_file.close()
- from storage import storage
- storage = storage()
+ from storage import storage
+ storage = storage()
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
- root_folder = storage.load()
- from bkmk_objects import make_linear, break_tree
- make_linear(root_folder)
- objects = len(root_folder.linear)
+ root_folder = storage.load()
+ from bkmk_objects import make_linear, break_tree
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
- if report_stats:
- print("Ok")
+ if report_stats:
+ print("Ok")
- # Run through the list of objects and check URLs/titles
- changed = 0
- for object_no in range(objects):
- object = root_folder.linear[object_no]
+ # Run through the list of objects and check URLs/titles
+ changed = 0
+ for object_no in range(objects):
+ object = root_folder.linear[object_no]
- if object.isBookmark:
- URL = object.href
- if titles_dict.has_key(URL):
- name = titles_dict[URL]
- if object.name != name:
- object.name = name
- changed += 1
+ if object.isBookmark:
+ URL = object.href
+ if titles_dict.has_key(URL):
+ name = titles_dict[URL]
+ if object.name != name:
+ object.name = name
+ changed += 1
- if changed and report_stats:
- sys.stdout.write("Saving %s: " % storage.filename)
- sys.stdout.flush()
+ if changed and report_stats:
+ sys.stdout.write("Saving %s: " % storage.filename)
+ sys.stdout.flush()
- if not changed and report_stats:
- sys.stdout.write("No need to save data\n")
- sys.stdout.flush()
+ if not changed and report_stats:
+ sys.stdout.write("No need to save data\n")
+ sys.stdout.flush()
- if changed:
- break_tree(root_folder.linear)
- storage.store(root_folder)
+ if changed:
+ break_tree(root_folder.linear)
+ storage.store(root_folder)
- if changed and report_stats:
- print("Ok")
- print(objects, "objects passed")
- print(changed, "objects changed")
+ if changed and report_stats:
+ print("Ok")
+ print(objects, "objects passed")
+ print(changed, "objects changed")
if __name__ == '__main__':
- run()
+ run()
class SortBy(object):
- def __init__(self, sort_by):
- self.sort_by = sort_by
+ def __init__(self, sort_by):
+ self.sort_by = sort_by
- def __call__(self, o1, o2):
- try:
- attr1 = int(getattr(o1, self.sort_by))
- except (AttributeError, TypeError, ValueError):
- return 1
+ def __call__(self, o1, o2):
+ try:
+ attr1 = int(getattr(o1, self.sort_by))
+ except (AttributeError, TypeError, ValueError):
+ return 1
- try:
- attr2 = int(getattr(o2, self.sort_by))
- except (AttributeError, TypeError, ValueError):
- return -1
+ try:
+ attr2 = int(getattr(o2, self.sort_by))
+ except (AttributeError, TypeError, ValueError):
+ return -1
- return cmp(attr1, attr2)
+ return cmp(attr1, attr2)
def walk_linear(linear, walker):
- for object in linear:
- if object.isBookmark:
- walker.bookmark(object, 0)
+ for object in linear:
+ if object.isBookmark:
+ walker.bookmark(object, 0)
def run():
- from getopt import getopt
- optlist, args = getopt(sys.argv[1:], "avmztrs")
-
- sort_by = "last_modified"
- reverse = 0
- report_stats = 1
-
- for _opt, _arg in optlist:
- if _opt == '-a':
- sort_by = "add_date"
- elif _opt == '-v':
- sort_by = "last_visit"
- elif _opt == '-m':
- sort_by = "last_modified"
- elif _opt == '-z':
- sort_by = "size"
- elif _opt == '-t':
- sort_by = "last_tested"
- elif _opt == '-r':
- reverse = 1
- elif _opt == '-s':
- report_stats = 0
- try:
- del _opt, _arg
- except NameError:
- pass
-
- from storage import storage
- storage = storage()
-
- if report_stats:
- sys.stdout.write("Loading %s: " % storage.filename)
- sys.stdout.flush()
-
- root_folder = storage.load()
-
- if report_stats:
- print("Ok")
- sys.stdout.write("Sorting (by %s): " % sort_by)
- sys.stdout.flush()
-
- from bkmk_objects import make_linear
- make_linear(root_folder)
-
- linear = root_folder.linear
- del linear[0] # exclude root folder from sorting
-
- by = SortBy(sort_by)
- linear.sort(by)
-
- from writers import writer
- output_filename = "%s-sorted_by-%s" % (writer.filename, sort_by)
-
- if reverse:
- linear.reverse()
- output_filename = output_filename + "-reverse"
-
- if report_stats:
- print("done")
- sys.stdout.write("Writing %s: " % output_filename)
- sys.stdout.flush()
-
- outfile = open(output_filename, 'w')
- writer = writer(outfile)
- writer.root_folder(root_folder)
- walk_linear(linear, writer)
- outfile.close()
-
- if report_stats:
- print("Ok")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "avmztrs")
+
+ sort_by = "last_modified"
+ reverse = 0
+ report_stats = 1
+
+ for _opt, _arg in optlist:
+ if _opt == '-a':
+ sort_by = "add_date"
+ elif _opt == '-v':
+ sort_by = "last_visit"
+ elif _opt == '-m':
+ sort_by = "last_modified"
+ elif _opt == '-z':
+ sort_by = "size"
+ elif _opt == '-t':
+ sort_by = "last_tested"
+ elif _opt == '-r':
+ reverse = 1
+ elif _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ from storage import storage
+ storage = storage()
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+
+ if report_stats:
+ print("Ok")
+ sys.stdout.write("Sorting (by %s): " % sort_by)
+ sys.stdout.flush()
+
+ from bkmk_objects import make_linear
+ make_linear(root_folder)
+
+ linear = root_folder.linear
+ del linear[0] # exclude root folder from sorting
+
+ by = SortBy(sort_by)
+ linear.sort(by)
+
+ from writers import writer
+ output_filename = "%s-sorted_by-%s" % (writer.filename, sort_by)
+
+ if reverse:
+ linear.reverse()
+ output_filename = output_filename + "-reverse"
+
+ if report_stats:
+ print("done")
+ sys.stdout.write("Writing %s: " % output_filename)
+ sys.stdout.flush()
+
+ outfile = open(output_filename, 'w')
+ writer = writer(outfile)
+ writer.root_folder(root_folder)
+ walk_linear(linear, writer)
+ outfile.close()
+
+ if report_stats:
+ print("Ok")
if __name__ == '__main__':
- run()
+ run()
storage_name, storage_params = parse_params(environ.get("BKMK_STORAGE", "pickle"))
def import_storage(storage_name):
- exec("from Storage import bkmk_st%s" % storage_name)
- exec("storage = bkmk_st%s.storage_%s" % (storage_name, storage_name))
- return storage
+ exec("from Storage import bkmk_st%s" % storage_name)
+ exec("storage = bkmk_st%s.storage_%s" % (storage_name, storage_name))
+ return storage
storage = import_storage(storage_name)
set_params(storage, storage_params)
__version__ = "Revision: 1.15 "
-# Id: subproc.py,v 1.15 1998/12/14 20:53:16 klm Exp
+# Id: subproc.py,v 1.15 1998/12/14 20:53:16 klm Exp
# Originally by ken manheimer, ken.manheimer@nist.gov, jan 1995.
# Prior art: Initially based python code examples demonstrating usage of pipes
got0 = self.readPendingChars(n)
got = got + got0
n = n - len(got0)
- return got
+ return got
def readPendingChars(self, max=None):
"""Read all currently pending subprocess output as a single string."""
return self.readbuf.readPendingChars(max)
got = ""
if self.buf:
- if (max > 0) and (len(self.buf) > max):
- got = self.buf[0:max]
- self.buf = self.buf[max:]
- else:
- got, self.buf = self.buf, ''
- return got
+ if (max > 0) and (len(self.buf) > max):
+ got = self.buf[0:max]
+ self.buf = self.buf[max:]
+ else:
+ got, self.buf = self.buf, ''
+ return got
if self.eof:
- return ''
+ return ''
sel = select.select([self.fd], [], [self.fd], 0)
if sel[2]:
line = string.splitfields(line, ':')
it[string.strip(line[0])] = (
string.strip(string.join(line[1:])))
-
+
def getreply(self):
"""Consume next response from ph, returning list of lines or string
err."""
writer_name, writer_params = parse_params(environ.get("BKMK_WRITER", "html"))
def import_writer(writer_name):
- exec("from Writers import bkmk_w%s" % writer_name)
- exec("writer = bkmk_w%s.writer_%s" % (writer_name, writer_name))
- return writer
+ exec("from Writers import bkmk_w%s" % writer_name)
+ exec("writer = bkmk_w%s.writer_%s" % (writer_name, writer_name))
+ return writer
writer = import_writer(writer_name)
set_params(writer, writer_params)