From: Oleg Broytman Date: Sat, 13 May 2017 19:34:41 +0000 (+0300) Subject: Cleanup code: use 4 spaces X-Git-Tag: 5.0.0~117 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=c88cb7a75e7caf1d67466cfa107981d95115fa0c;p=bookmarks_db.git Cleanup code: use 4 spaces --- diff --git a/Robots/bkmk_rforking.py b/Robots/bkmk_rforking.py index 4ad9984..cad8b2d 100644 --- a/Robots/bkmk_rforking.py +++ b/Robots/bkmk_rforking.py @@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['robot_forking'] @@ -13,9 +13,9 @@ __all__ = ['robot_forking'] import sys, os try: - import cPickle as pickle + import cPickle as pickle except ImportError: - import pickle + import pickle from subproc import Subprocess, RecordFile from bkmk_objects import Robot @@ -29,74 +29,74 @@ check_subp = None subp_pipe = None def stop_subp(log): - global check_subp, subp_pipe - if check_subp: - if log: log(" restarting hanging subprocess") - del check_subp - del subp_pipe + global check_subp, subp_pipe + if check_subp: + if log: log(" restarting hanging subprocess") + del check_subp + del subp_pipe def restart_subp(log): - global check_subp, subp_pipe - stop_subp(log) + global check_subp, subp_pipe + stop_subp(log) - check_subp = Subprocess("%s/Robots/bkmk_rforking_sub.py" % os.path.dirname(sys.argv[0]), - control_stderr=True) - subp_pipe = RecordFile(check_subp) + check_subp = Subprocess("%s/Robots/bkmk_rforking_sub.py" % os.path.dirname(sys.argv[0]), + control_stderr=True) + subp_pipe = RecordFile(check_subp) _set_subproc = True class robot_forking(Robot): - subproc = 'urllib2' # Default subprocess - - def check_url(self, bookmark): - global _set_subproc - if _set_subproc: - _set_subproc = False - - subproc = self.subproc - subproc_attrs = [] - for attr in dir(self): - if attr.startswith('subproc_'): - subproc_attrs.append((attr[len('subproc_'):], getattr(self, attr))) - if subproc_attrs: - subproc += ':' + ':'.join(['='.join((k,v)) for k,v in subproc_attrs]) - os.environ['BKMK_ROBOT'] = subproc - - if not check_subp: - restart_subp(self.log) # Not restart, just start afresh - - try: - save_parent = bookmark.parent - bookmark.parent = None - subp_pipe.write_record(pickle.dumps(bookmark)) - - if check_subp.waitForPendingChar(60): # wait a minute - new_b = pickle.loads(subp_pipe.read_record()) - for attr in ("error", "no_error", - "moved", "size", "md5", "real_title", - "last_tested", "last_modified", "test_time", - "icon", "icon_href"): - if hasattr(new_b, attr): - setattr(bookmark, attr, getattr(new_b, attr)) - else: - bookmark.error = "Subprocess connection timed out" - restart_subp(self.log) - - bookmark.parent = save_parent - - while True: - error = check_subp.readPendingErrLine() - if not error: - break - sys.stderr.write("(subp) " + error) - sys.stderr.flush() - - except KeyboardInterrupt: - return 0 - - # Tested - return 1 - - def stop(self): - stop_subp(None) # Stop subprocess; do not log restarting + subproc = 'urllib2' # Default subprocess + + def check_url(self, bookmark): + global _set_subproc + if _set_subproc: + _set_subproc = False + + subproc = self.subproc + subproc_attrs = [] + for attr in dir(self): + if attr.startswith('subproc_'): + subproc_attrs.append((attr[len('subproc_'):], getattr(self, attr))) + if subproc_attrs: + subproc += ':' + ':'.join(['='.join((k,v)) for k,v in subproc_attrs]) + os.environ['BKMK_ROBOT'] = subproc + + if not check_subp: + restart_subp(self.log) # Not restart, just start afresh + + try: + save_parent = bookmark.parent + bookmark.parent = None + subp_pipe.write_record(pickle.dumps(bookmark)) + + if check_subp.waitForPendingChar(60): # wait a minute + new_b = pickle.loads(subp_pipe.read_record()) + for attr in ("error", "no_error", + "moved", "size", "md5", "real_title", + "last_tested", "last_modified", "test_time", + "icon", "icon_href"): + if hasattr(new_b, attr): + setattr(bookmark, attr, getattr(new_b, attr)) + else: + bookmark.error = "Subprocess connection timed out" + restart_subp(self.log) + + bookmark.parent = save_parent + + while True: + error = check_subp.readPendingErrLine() + if not error: + break + sys.stderr.write("(subp) " + error) + sys.stderr.flush() + + except KeyboardInterrupt: + return 0 + + # Tested + return 1 + + def stop(self): + stop_subp(None) # Stop subprocess; do not log restarting diff --git a/Robots/bkmk_rforking_sub.py b/Robots/bkmk_rforking_sub.py index 7dd1f5f..9b25b5b 100755 --- a/Robots/bkmk_rforking_sub.py +++ b/Robots/bkmk_rforking_sub.py @@ -6,7 +6,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 1999-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 1999-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = [] @@ -18,32 +18,32 @@ lib_dir = os.path.normpath(os.path.join(os.path.dirname(sys.argv[0]), os.pardir) sys.path.append(lib_dir) # for bkmk_objects.py try: - import cPickle - pickle = cPickle + import cPickle + pickle = cPickle except ImportError: - import pickle + import pickle from subproc import RecordFile def run(): - bkmk_in = RecordFile(sys.stdin) - bkmk_out = RecordFile(sys.stdout) + bkmk_in = RecordFile(sys.stdin) + bkmk_out = RecordFile(sys.stdout) - from m_lib.flog import openlog - log = openlog("check2.log") - from robots import robot - robot = robot(log) + from m_lib.flog import openlog + log = openlog("check2.log") + from robots import robot + robot = robot(log) - while 1: - bookmark = pickle.loads(bkmk_in.read_record()) - log(bookmark.href) - robot.check_url(bookmark) - bkmk_out.write_record(pickle.dumps(bookmark)) - log.outfile.flush() + while 1: + bookmark = pickle.loads(bkmk_in.read_record()) + log(bookmark.href) + robot.check_url(bookmark) + bkmk_out.write_record(pickle.dumps(bookmark)) + log.outfile.flush() - log.close() + log.close() if __name__ == '__main__': - run() + run() diff --git a/Robots/bkmk_robot_base.py b/Robots/bkmk_robot_base.py index 8dd032b..0600e73 100644 --- a/Robots/bkmk_robot_base.py +++ b/Robots/bkmk_robot_base.py @@ -35,204 +35,204 @@ reloc_dict = { def get_error(e): - if isinstance(e, str): - return e + if isinstance(e, str): + return e - else: - s = [] - for i in e: - s.append("'%s'" % str(i).replace('\n', "\\n")) - return "(%s)" % ' '.join(s) + else: + s = [] + for i in e: + s.append("'%s'" % str(i).replace('\n', "\\n")) + return "(%s)" % ' '.join(s) icons = {} # Icon cache; maps URL to a tuple (content type, data) # or None if there is no icon. class robot_base(Robot): - timeout = 60 + timeout = 60 - def __init__(self, *args, **kw): + def __init__(self, *args, **kw): Robot.__init__(self, *args, **kw) socket.setdefaulttimeout(int(self.timeout)) - def check_url(self, bookmark): - try: - self.start = int(time.time()) - bookmark.icon = None + def check_url(self, bookmark): + try: + self.start = int(time.time()) + bookmark.icon = None - url_type, url_rest = urllib.splittype(bookmark.href) - url_host, url_path = urllib.splithost(url_rest) - url_path, url_tag = urllib.splittag(url_path) + url_type, url_rest = urllib.splittype(bookmark.href) + url_host, url_path = urllib.splithost(url_rest) + url_path, url_tag = urllib.splittag(url_path) - url = "%s://%s%s" % (url_type, url_host, url_path) - error, redirect_code, redirect_to, headers, content = self.get(bookmark, url, True) + url = "%s://%s%s" % (url_type, url_host, url_path) + error, redirect_code, redirect_to, headers, content = self.get(bookmark, url, True) - if error: - bookmark.error = error - return 1 + if error: + bookmark.error = error + return 1 - if redirect_code: - self.set_redirect(bookmark, redirect_code, redirect_to) - return 1 + if redirect_code: + self.set_redirect(bookmark, redirect_code, redirect_to) + return 1 - size = 0 - last_modified = None + size = 0 + last_modified = None - if headers: - try: - size = headers["Content-Length"] - except KeyError: - size = len(content) + if headers: + try: + size = headers["Content-Length"] + except KeyError: + size = len(content) - try: - last_modified = headers["Last-Modified"] - except KeyError: - pass + try: + last_modified = headers["Last-Modified"] + except KeyError: + pass + + if last_modified: + last_modified = parse_time(last_modified) + else: + size = len(content) if last_modified: - last_modified = parse_time(last_modified) - else: - size = len(content) - - if last_modified: - last_modified = str(int(last_modified)) - else: - last_modified = bookmark.last_visit - - bookmark.size = size - bookmark.last_modified = last_modified - - md5 = md5wrapper() - if url_type == "ftp": # Pass welcome message through MD5 - md5.update(self.get_ftp_welcome()) - - md5.update(content) - bookmark.md5 = str(md5) - - if headers: - try: - content_type = headers["Content-Type"] - self.log(" Content-Type: %s" % content_type) - try: - # extract charset from "text/html; foo; charset=UTF-8, bar; baz;" - content_type, charset = content_type.split(';', 1) - content_type = content_type.strip() - charset = charset.split('=')[1].strip().split(',')[0] - self.log(" HTTP charset : %s" % charset) - except (ValueError, IndexError): - charset = None - self.log(" no charset in Content-Type header") - for ctype in ("text/html", "application/xhtml+xml"): - if content_type.startswith(ctype): - html = True - break - else: - html = False - if html: - parser = parse_html(content, charset, self.log) - if parser: - bookmark.real_title = parser.title - icon = parser.icon - else: - icon = None - if not icon: - icon = "/favicon.ico" - icon_url = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon) - self.log(" looking for icon at: %s" % icon_url) - if icon_url in icons: - if icons[icon_url]: - bookmark.icon_href = icon_url - content_type, bookmark.icon = icons[icon_url] - self.log(" cached icon: %s" % content_type) - else: - self.log(" cached icon: no icon") - else: - try: - _icon_url = icon_url - for i in range(8): - error, icon_redirect_code, icon_redirect_to, \ - icon_headers, icon_data = \ - self.get(bookmark, _icon_url) - if icon_redirect_code: - _icon_url = icon_redirect_to - self.log(" redirect to : %s" % _icon_url) - else: - if icon_data is None: - raise IOError("No icon") - break + last_modified = str(int(last_modified)) + else: + last_modified = bookmark.last_visit + + bookmark.size = size + bookmark.last_modified = last_modified + + md5 = md5wrapper() + if url_type == "ftp": # Pass welcome message through MD5 + md5.update(self.get_ftp_welcome()) + + md5.update(content) + bookmark.md5 = str(md5) + + if headers: + try: + content_type = headers["Content-Type"] + self.log(" Content-Type: %s" % content_type) + try: + # extract charset from "text/html; foo; charset=UTF-8, bar; baz;" + content_type, charset = content_type.split(';', 1) + content_type = content_type.strip() + charset = charset.split('=')[1].strip().split(',')[0] + self.log(" HTTP charset : %s" % charset) + except (ValueError, IndexError): + charset = None + self.log(" no charset in Content-Type header") + for ctype in ("text/html", "application/xhtml+xml"): + if content_type.startswith(ctype): + html = True + break + else: + html = False + if html: + parser = parse_html(content, charset, self.log) + if parser: + bookmark.real_title = parser.title + icon = parser.icon else: - raise IOError("Too many redirects") - except: - etype, emsg, tb = sys.exc_info() - self.log(" no icon : %s %s" % (etype, emsg)) - etype = emsg = tb = None - icons[icon_url] = None - else: - content_type = icon_headers["Content-Type"] - if content_type.startswith("application/") \ - or content_type.startswith("image/") \ - or content_type.startswith("text/plain"): - bookmark.icon_href = icon_url - self.log(" got icon : %s" % content_type) - if content_type.startswith("application/") \ - or content_type.startswith("text/plain"): - self.log(" non-image content type, assume x-icon") - content_type = 'image/x-icon' - bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data)) - icons[icon_url] = (content_type, bookmark.icon) + icon = None + if not icon: + icon = "/favicon.ico" + icon_url = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon) + self.log(" looking for icon at: %s" % icon_url) + if icon_url in icons: + if icons[icon_url]: + bookmark.icon_href = icon_url + content_type, bookmark.icon = icons[icon_url] + self.log(" cached icon: %s" % content_type) + else: + self.log(" cached icon: no icon") else: - self.log(" no icon : bad content type '%s'" % content_type) - icons[icon_url] = None - if parser and parser.refresh: - refresh = parser.refresh - try: - url = refresh.split('=', 1)[1] - except IndexError: - url = "self" - try: - timeout = float(refresh.split(';')[0]) - except (IndexError, ValueError): - self.set_redirect(bookmark, "html", "Bad redirect to %s (%s)" % (url, refresh)) - else: - try: - timeout = int(refresh.split(';')[0]) - except ValueError: - pass # float timeout - self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout)) - - except KeyError as key: - self.log(" no header: %s" % key) - - except EOFError: - bookmark.error = "Unexpected EOF (FTP server closed connection)" - self.log(' EOF: %s' % bookmark.error) - - except KeyboardInterrupt: - self.log("Keyboard interrupt (^C)") - return 0 - - except socket.error as e: - bookmark.error = get_error(e) - self.log(bookmark.error) - - except: - import traceback - traceback.print_exc() - bookmark.error = "Exception!" - self.log(' Exception: %s' % bookmark.error) - - finally: - self.finish_check_url(bookmark) - - # Tested - return 1 - - def set_redirect(self, bookmark, errcode, newurl): + try: + _icon_url = icon_url + for i in range(8): + error, icon_redirect_code, icon_redirect_to, \ + icon_headers, icon_data = \ + self.get(bookmark, _icon_url) + if icon_redirect_code: + _icon_url = icon_redirect_to + self.log(" redirect to : %s" % _icon_url) + else: + if icon_data is None: + raise IOError("No icon") + break + else: + raise IOError("Too many redirects") + except: + etype, emsg, tb = sys.exc_info() + self.log(" no icon : %s %s" % (etype, emsg)) + etype = emsg = tb = None + icons[icon_url] = None + else: + content_type = icon_headers["Content-Type"] + if content_type.startswith("application/") \ + or content_type.startswith("image/") \ + or content_type.startswith("text/plain"): + bookmark.icon_href = icon_url + self.log(" got icon : %s" % content_type) + if content_type.startswith("application/") \ + or content_type.startswith("text/plain"): + self.log(" non-image content type, assume x-icon") + content_type = 'image/x-icon' + bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data)) + icons[icon_url] = (content_type, bookmark.icon) + else: + self.log(" no icon : bad content type '%s'" % content_type) + icons[icon_url] = None + if parser and parser.refresh: + refresh = parser.refresh + try: + url = refresh.split('=', 1)[1] + except IndexError: + url = "self" + try: + timeout = float(refresh.split(';')[0]) + except (IndexError, ValueError): + self.set_redirect(bookmark, "html", "Bad redirect to %s (%s)" % (url, refresh)) + else: + try: + timeout = int(refresh.split(';')[0]) + except ValueError: + pass # float timeout + self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout)) + + except KeyError as key: + self.log(" no header: %s" % key) + + except EOFError: + bookmark.error = "Unexpected EOF (FTP server closed connection)" + self.log(' EOF: %s' % bookmark.error) + + except KeyboardInterrupt: + self.log("Keyboard interrupt (^C)") + return 0 + + except socket.error as e: + bookmark.error = get_error(e) + self.log(bookmark.error) + + except: + import traceback + traceback.print_exc() + bookmark.error = "Exception!" + self.log(' Exception: %s' % bookmark.error) + + finally: + self.finish_check_url(bookmark) + + # Tested + return 1 + + def set_redirect(self, bookmark, errcode, newurl): bookmark.moved = "(%s) to %s" % (reloc_dict[errcode], newurl) self.log(' Moved: %s' % bookmark.moved) - def finish_check_url(self, bookmark): - start = self.start - bookmark.last_tested = str(start) - now = int(time.time()) - bookmark.test_time = str(now - start) + def finish_check_url(self, bookmark): + start = self.start + bookmark.last_tested = str(start) + now = int(time.time()) + bookmark.test_time = str(now - start) diff --git a/Robots/bkmk_rurllib.py b/Robots/bkmk_rurllib.py index 6f15ad5..c7324e3 100644 --- a/Robots/bkmk_rurllib.py +++ b/Robots/bkmk_rurllib.py @@ -17,38 +17,38 @@ from Robots.bkmk_robot_base import robot_base, get_error class RedirectException(Exception): - def __init__(self, errcode, newurl): - Exception.__init__(self) - self.errcode = errcode - self.newurl = newurl + def __init__(self, errcode, newurl): + Exception.__init__(self) + self.errcode = errcode + self.newurl = newurl class MyURLopener(urllib.URLopener): - # Error 301 -- relocated (permanently) - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - if headers.has_key('location'): - newurl = headers['location'] - elif headers.has_key('uri'): - newurl = headers['uri'] - else: - newurl = "Nowhere" - raise RedirectException(errcode, newurl) - - # Error 302 -- relocated (temporarily) - http_error_302 = http_error_301 - # Error 303 -- relocated (see other) - http_error_303 = http_error_301 - # Error 307 -- relocated (temporarily) - http_error_307 = http_error_301 - - # Error 401 -- authentication required - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): - raise IOError(('http error', errcode, "Authentication required ", headers)) - - def http_error_default(self, url, fp, errcode, errmsg, headers): - if fp: - void = fp.read() - fp.close() - raise IOError(('http error', errcode, errmsg, headers)) + # Error 301 -- relocated (permanently) + def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): + if headers.has_key('location'): + newurl = headers['location'] + elif headers.has_key('uri'): + newurl = headers['uri'] + else: + newurl = "Nowhere" + raise RedirectException(errcode, newurl) + + # Error 302 -- relocated (temporarily) + http_error_302 = http_error_301 + # Error 303 -- relocated (see other) + http_error_303 = http_error_301 + # Error 307 -- relocated (temporarily) + http_error_307 = http_error_301 + + # Error 401 -- authentication required + def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): + raise IOError(('http error', errcode, "Authentication required ", headers)) + + def http_error_default(self, url, fp, errcode, errmsg, headers): + if fp: + void = fp.read() + fp.close() + raise IOError(('http error', errcode, errmsg, headers)) urllib._urlopener = MyURLopener() @@ -71,55 +71,55 @@ urllib_ftpwrapper = urllib.ftpwrapper ftpcache_key = None class myftpwrapper(urllib_ftpwrapper): - def __init__(self, user, passwd, host, port, dirs): - urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs) - global ftpcache_key - ftpcache_key = (user, host, port, '/'.join(dirs)) + def __init__(self, user, passwd, host, port, dirs): + urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs) + global ftpcache_key + ftpcache_key = (user, host, port, '/'.join(dirs)) urllib.ftpwrapper = myftpwrapper class robot_urllib(robot_base): - def get(self, bookmark, url, accept_charset=False): - try: - # Set fake referer to the base URL - urllib._urlopener.addheaders[2] = ('Referer', url) - - if accept_charset and bookmark.charset: - urllib._urlopener.addheader('Accept-Charset', bookmark.charset) - try: - fname, headers = urllib.urlretrieve(url) - finally: + def get(self, bookmark, url, accept_charset=False): + try: + # Set fake referer to the base URL + urllib._urlopener.addheaders[2] = ('Referer', url) + if accept_charset and bookmark.charset: - del urllib._urlopener.addheaders[-1] # Remove Accept-Charset - - infile = open(fname, 'rb') - content = infile.read() - infile.close() - - return None, None, None, headers, content - - except RedirectException as e: - return None, e.errcode, e.newurl, None, None - - except IOError as e: - if (e[0] == "http error") and (e[1] == -1): - error = None - bookmark.no_error = "The server did not return any header - it is not an error, actually" - self.log(' no headers: %s' % bookmark.no_error) - else: - error = get_error(e) - self.log(' Error: %s' % error) - - return error, None, None, None, None - - def get_ftp_welcome(self): - global ftpcache_key - _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome - ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db. - # If there are - ftpcache_key in prev line is invalid. - return _welcome - - def finish_check_url(self, bookmark): - robot_base.finish_check_url(self, bookmark) - urllib.urlcleanup() + urllib._urlopener.addheader('Accept-Charset', bookmark.charset) + try: + fname, headers = urllib.urlretrieve(url) + finally: + if accept_charset and bookmark.charset: + del urllib._urlopener.addheaders[-1] # Remove Accept-Charset + + infile = open(fname, 'rb') + content = infile.read() + infile.close() + + return None, None, None, headers, content + + except RedirectException as e: + return None, e.errcode, e.newurl, None, None + + except IOError as e: + if (e[0] == "http error") and (e[1] == -1): + error = None + bookmark.no_error = "The server did not return any header - it is not an error, actually" + self.log(' no headers: %s' % bookmark.no_error) + else: + error = get_error(e) + self.log(' Error: %s' % error) + + return error, None, None, None, None + + def get_ftp_welcome(self): + global ftpcache_key + _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome + ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db. + # If there are - ftpcache_key in prev line is invalid. + return _welcome + + def finish_check_url(self, bookmark): + robot_base.finish_check_url(self, bookmark) + urllib.urlcleanup() diff --git a/Storage/bkmk_stflad.py b/Storage/bkmk_stflad.py index b936c4a..63ee040 100644 --- a/Storage/bkmk_stflad.py +++ b/Storage/bkmk_stflad.py @@ -18,123 +18,123 @@ from bkmk_objects import Folder, Bookmark, Ruler, Walker class storage_flad(Walker): - filename = "bookmarks_db.flad" - - def __init__(self): - self.first_object = 1 - - def root_folder(self, f): - header = string.replace(f.header, ".", ".\n") - header = string.replace(header, "<", "\n<", 3)[1:] - header_file = open("header", 'w') - header_file.write(header + "\n") - header_file.write('

%s

\n\n' % f.name) - if f.comment: header_file.write('
%s\n' % f.comment) - header_file.close() - - def start_folder(self, f, level): - self.outfile.write(""" -Level: %d -Folder: %s -AddDate: %s -Comment: %s -LastModified: %s -""" % (level, f.name, f.add_date, f.comment, f.last_modified or '')) - - def bookmark(self, b, level): - self.outfile.write(""" -Level: %d -Title: %s -URL: %s -AddDate: %s -LastVisit: %s -LastModified: %s -Keyword: %s -Comment: %s -IconURI: %s -Icon: %s -Charset: %s -""" % (level+1, b.name, b.href, b.add_date, b.last_visit, b.last_modified, - b.keyword, b.comment.replace('\n', "\\n"), - b.icon_href or '', b.icon or '', b.charset or '')) - - def ruler(self, r, level): - self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1)) - - def store(self, root_folder): - self.outfile = open(self.filename, 'w') - root_folder.walk_depth(self) - self.outfile.close() - - def unindent(self, old_level, new_level): - while old_level > new_level: - old_level = old_level - 1 - del self.folder_stack[-1] - - if self.folder_stack: - self.current_folder = self.folder_stack[-1] - else: - self.current_folder = None - - def load(self): - bookmarks_db = fladm.load_from_file(self.filename, fladm.check_record, ["Level"]) - - root_folder = Folder() - self.folder_stack = [root_folder] - self.current_folder = root_folder - - header_file = open("header", 'r') - header = header_file.read() - header_file.close() - - header = string.split(header, "\n") - root_folder.header = string.join(header[:5], '') - root_folder.name = header[5][4:-5] - root_folder.comment = string.join(header[7:], '')[4:] - - save_level = 0 - got_folder = 1 # Start as if we already have one folder - - for record in bookmarks_db: - level = int(record["Level"]) - - if level == save_level: - pass - elif level == save_level + 1: - if not got_folder: - raise ValueError("indent without folder") - elif level <= save_level - 1: - self.unindent(save_level, level) - else: - raise ValueError("new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1)) - - save_level = level - got_folder = record.has_key("Folder") # Test here to save got_folder for next loop - - if record.has_key("URL"): - comment = record["Comment"].replace("\\n", '\n') - bookmark = Bookmark(record["URL"], record["AddDate"], - record["LastVisit"], record["LastModified"], - record["Keyword"], comment) - bookmark.name = record["Title"] - self.current_folder.append(bookmark) - - elif record.has_key("Folder"): - folder = Folder(record["AddDate"], record["Comment"], record["LastModified"]) - folder.name = record["Folder"] - self.current_folder.append(folder) - self.folder_stack.append(folder) - self.current_folder = folder - - elif record.has_key("Ruler"): - self.current_folder.append(Ruler()) - - else: - raise KeyError("neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record)) - - if save_level >= 0: - self.unindent(save_level, 0) - else: - raise ValueError("new level (%d) too little - must be >= 0" % save_level) - - return root_folder + filename = "bookmarks_db.flad" + + def __init__(self): + self.first_object = 1 + + def root_folder(self, f): + header = string.replace(f.header, ".", ".\n") + header = string.replace(header, "<", "\n<", 3)[1:] + header_file = open("header", 'w') + header_file.write(header + "\n") + header_file.write('

%s

\n\n' % f.name) + if f.comment: header_file.write('
%s\n' % f.comment) + header_file.close() + + def start_folder(self, f, level): + self.outfile.write(""" + Level: %d + Folder: %s + AddDate: %s + Comment: %s + LastModified: %s + """ % (level, f.name, f.add_date, f.comment, f.last_modified or '')) + + def bookmark(self, b, level): + self.outfile.write(""" + Level: %d + Title: %s + URL: %s + AddDate: %s + LastVisit: %s + LastModified: %s + Keyword: %s + Comment: %s + IconURI: %s + Icon: %s + Charset: %s + """ % (level+1, b.name, b.href, b.add_date, b.last_visit, b.last_modified, + b.keyword, b.comment.replace('\n', "\\n"), + b.icon_href or '', b.icon or '', b.charset or '')) + + def ruler(self, r, level): + self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1)) + + def store(self, root_folder): + self.outfile = open(self.filename, 'w') + root_folder.walk_depth(self) + self.outfile.close() + + def unindent(self, old_level, new_level): + while old_level > new_level: + old_level = old_level - 1 + del self.folder_stack[-1] + + if self.folder_stack: + self.current_folder = self.folder_stack[-1] + else: + self.current_folder = None + + def load(self): + bookmarks_db = fladm.load_from_file(self.filename, fladm.check_record, ["Level"]) + + root_folder = Folder() + self.folder_stack = [root_folder] + self.current_folder = root_folder + + header_file = open("header", 'r') + header = header_file.read() + header_file.close() + + header = string.split(header, "\n") + root_folder.header = string.join(header[:5], '') + root_folder.name = header[5][4:-5] + root_folder.comment = string.join(header[7:], '')[4:] + + save_level = 0 + got_folder = 1 # Start as if we already have one folder + + for record in bookmarks_db: + level = int(record["Level"]) + + if level == save_level: + pass + elif level == save_level + 1: + if not got_folder: + raise ValueError("indent without folder") + elif level <= save_level - 1: + self.unindent(save_level, level) + else: + raise ValueError("new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1)) + + save_level = level + got_folder = record.has_key("Folder") # Test here to save got_folder for next loop + + if record.has_key("URL"): + comment = record["Comment"].replace("\\n", '\n') + bookmark = Bookmark(record["URL"], record["AddDate"], + record["LastVisit"], record["LastModified"], + record["Keyword"], comment) + bookmark.name = record["Title"] + self.current_folder.append(bookmark) + + elif record.has_key("Folder"): + folder = Folder(record["AddDate"], record["Comment"], record["LastModified"]) + folder.name = record["Folder"] + self.current_folder.append(folder) + self.folder_stack.append(folder) + self.current_folder = folder + + elif record.has_key("Ruler"): + self.current_folder.append(Ruler()) + + else: + raise KeyError("neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record)) + + if save_level >= 0: + self.unindent(save_level, 0) + else: + raise ValueError("new level (%d) too little - must be >= 0" % save_level) + + return root_folder diff --git a/Storage/bkmk_stjson.py b/Storage/bkmk_stjson.py index 86e0270..5fbeda8 100644 --- a/Storage/bkmk_stjson.py +++ b/Storage/bkmk_stjson.py @@ -5,16 +5,16 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['storage_json'] try: - import json + import json except ImportError: - import simplejson as json + import simplejson as json from bkmk_objects import Folder, Bookmark, Ruler, Walker @@ -115,9 +115,9 @@ class storage_json(Walker): self.current_folder = root_folder if "type" not in bookmarks_dict: - bookmarks_dict["id"] = "0" - bookmarks_dict["title"] = "" - bookmarks_dict["type"] = "text/x-moz-place-container" + bookmarks_dict["id"] = "0" + bookmarks_dict["title"] = "" + bookmarks_dict["type"] = "text/x-moz-place-container" self.load_folder(root_folder, bookmarks_dict) if self.folder_stack: raise RuntimeError('Excessive folder stack: %s' % self.folder_stack) diff --git a/Storage/bkmk_stpickle.py b/Storage/bkmk_stpickle.py index 444cf35..2598cf6 100644 --- a/Storage/bkmk_stpickle.py +++ b/Storage/bkmk_stpickle.py @@ -5,31 +5,31 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['storage_pickle'] try: - import cPickle - pickle = cPickle + import cPickle + pickle = cPickle except ImportError: - import pickle + import pickle class storage_pickle(object): - filename = "bookmarks_db.pickle" + filename = "bookmarks_db.pickle" - def store(self, root_folder): - outfile = open(self.filename, 'wb') - pickle.dump(root_folder, outfile, 1) - outfile.close() + def store(self, root_folder): + outfile = open(self.filename, 'wb') + pickle.dump(root_folder, outfile, 1) + outfile.close() - def load(self): - infile = open(self.filename, 'rb') - root_folder = pickle.load(infile) - infile.close() + def load(self): + infile = open(self.filename, 'rb') + root_folder = pickle.load(infile) + infile.close() - return root_folder + return root_folder diff --git a/Writers/bkmk_wflad.py b/Writers/bkmk_wflad.py index 999872b..11cd143 100644 --- a/Writers/bkmk_wflad.py +++ b/Writers/bkmk_wflad.py @@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['writer_flad'] @@ -15,53 +15,53 @@ from bkmk_objects import Writer def strftime(s): - try: - return time.strftime("%a %d %b %Y %T", time.localtime(int(s))) - except (TypeError, ValueError): # s is None or is already formatted - return s + try: + return time.strftime("%a %d %b %Y %T", time.localtime(int(s))) + except (TypeError, ValueError): # s is None or is already formatted + return s class writer_flad(Writer): - filename = "bookmarks_db.flad" - - def __init__(self, outfile, prune=None): - Writer.__init__(self, outfile, prune) - self.first_object = 1 - - def start_folder(self, f, level): - self.outfile.write(""" -Level: %d -Folder: %s -AddDate: %s -Comment: %s -LastModified: %s -""" % (level, f.name, strftime(f.add_date), f.comment, strftime(f.last_modified))) - - def bookmark(self, b, level): - self.outfile.write(""" -Level: %d -Title: %s -URL: %s -AddDate: %s -LastVisit: %s -LastModified: %s -Keyword: %s -Comment: %s""" % (level+1, b.name, b.href, strftime(b.add_date), strftime(b.last_visit), strftime(b.last_modified), b.keyword, b.comment)) - - for attr_name, attr_out in (("error", "Error"), ("no_error", "NoError"), - ("moved", "Moved"), ("size", "Size"), ("md5", "Md5"), - ("real_title", "RealTitle"), ("test_time", "TestTime"), - ("icon_href", "IconURI"), ("icon", "Icon"), ("charset", "Charset")): - if hasattr(b, attr_name): - value = getattr(b, attr_name) - if isinstance(value, unicode): - value = value.encode('utf-8') - self.outfile.write("\n%s: %s" % (attr_out, value)) - - if hasattr(b, "last_tested"): - self.outfile.write("\n%s: %s" % ("LastTested", strftime(getattr(b, "last_tested")))) - - self.outfile.write("\n") - - def ruler(self, r, level): - self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1)) + filename = "bookmarks_db.flad" + + def __init__(self, outfile, prune=None): + Writer.__init__(self, outfile, prune) + self.first_object = 1 + + def start_folder(self, f, level): + self.outfile.write(""" + Level: %d + Folder: %s + AddDate: %s + Comment: %s + LastModified: %s + """ % (level, f.name, strftime(f.add_date), f.comment, strftime(f.last_modified))) + + def bookmark(self, b, level): + self.outfile.write(""" + Level: %d + Title: %s + URL: %s + AddDate: %s + LastVisit: %s + LastModified: %s + Keyword: %s + Comment: %s""" % (level+1, b.name, b.href, strftime(b.add_date), strftime(b.last_visit), strftime(b.last_modified), b.keyword, b.comment)) + + for attr_name, attr_out in (("error", "Error"), ("no_error", "NoError"), + ("moved", "Moved"), ("size", "Size"), ("md5", "Md5"), + ("real_title", "RealTitle"), ("test_time", "TestTime"), + ("icon_href", "IconURI"), ("icon", "Icon"), ("charset", "Charset")): + if hasattr(b, attr_name): + value = getattr(b, attr_name) + if isinstance(value, unicode): + value = value.encode('utf-8') + self.outfile.write("\n%s: %s" % (attr_out, value)) + + if hasattr(b, "last_tested"): + self.outfile.write("\n%s: %s" % ("LastTested", strftime(getattr(b, "last_tested")))) + + self.outfile.write("\n") + + def ruler(self, r, level): + self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1)) diff --git a/Writers/bkmk_wflad_err.py b/Writers/bkmk_wflad_err.py index 418a3e0..a48484c 100644 --- a/Writers/bkmk_wflad_err.py +++ b/Writers/bkmk_wflad_err.py @@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['writer_flad_err'] @@ -15,8 +15,8 @@ from bkmk_wflad import writer_flad class writer_flad_err(writer_flad): - filename = "bookmarks_db.errors" + filename = "bookmarks_db.errors" - def bookmark(self, b, level): - if hasattr(b, "error"): - writer_flad.bookmark(self, b, level) + def bookmark(self, b, level): + if hasattr(b, "error"): + writer_flad.bookmark(self, b, level) diff --git a/Writers/bkmk_whtml.py b/Writers/bkmk_whtml.py index 707b52a..f7a1d65 100644 --- a/Writers/bkmk_whtml.py +++ b/Writers/bkmk_whtml.py @@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['writer_html'] @@ -15,51 +15,51 @@ from bkmk_objects import Writer, BKMK_FORMAT, quote_title def dump_comment(comment): - comment = comment.replace("
\n", "\n") - if BKMK_FORMAT == "NETSCAPE": - comment = comment.replace("\n", "
\n") - return comment + comment = comment.replace("
\n", "\n") + if BKMK_FORMAT == "NETSCAPE": + comment = comment.replace("\n", "
\n") + return comment ind_s = " "*4 class writer_html(Writer): - filename = "bookmarks.html" + filename = "bookmarks.html" - def _folder(self, f, level): - if f.comment: self.outfile.write('
%s\n' % dump_comment(f.comment)) - self.outfile.write(ind_s*level + "

\n") + def _folder(self, f, level): + if f.comment: self.outfile.write('

%s\n' % dump_comment(f.comment)) + self.outfile.write(ind_s*level + "

\n") - def root_folder(self, f): - self.outfile.write("\n") - self.outfile.write(f.header + "\n") - self.outfile.write('

%s

\n\n' % quote_title(f.name)) - self._folder(f, 0) + def root_folder(self, f): + self.outfile.write("\n") + self.outfile.write(f.header + "\n") + self.outfile.write('

%s

\n\n' % quote_title(f.name)) + self._folder(f, 0) - def start_folder(self, f, level): - self.outfile.write(ind_s*level + '

%s

\n' % quote_title(f.name)) - self._folder(f, level) + def start_folder(self, f, level): + self.outfile.write(ind_s*level + '

%s

\n' % quote_title(f.name)) + self._folder(f, level) - def end_folder(self, f, level): - self.outfile.write(ind_s*level + "

\n") + def end_folder(self, f, level): + self.outfile.write(ind_s*level + "

\n") - def bookmark(self, b, level): - self.outfile.write(ind_s*(level+1) + '

%s\n' % quote_title(b.name)) - if b.comment: self.outfile.write('
%s\n' % dump_comment(b.comment)) + def bookmark(self, b, level): + self.outfile.write(ind_s*(level+1) + '
%s\n' % quote_title(b.name)) + if b.comment: self.outfile.write('
%s\n' % dump_comment(b.comment)) - def ruler(self, r, level): - self.outfile.write(ind_s*(level+1) + "
\n") + def ruler(self, r, level): + self.outfile.write(ind_s*(level+1) + "
\n") diff --git a/Writers/bkmk_wtxt.py b/Writers/bkmk_wtxt.py index ba2196d..77b76d3 100644 --- a/Writers/bkmk_wtxt.py +++ b/Writers/bkmk_wtxt.py @@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['writer_txt'] @@ -16,19 +16,19 @@ ind_s = " "*4 class writer_txt(Writer): - filename = "dump.txt" + filename = "dump.txt" - def root_folder(self, f): - self.outfile.write("Folder: %s\n" % f.name) + def root_folder(self, f): + self.outfile.write("Folder: %s\n" % f.name) - def start_folder(self, f, level): - self.outfile.write(ind_s*level + "Folder: %s\n" % f.name) + def start_folder(self, f, level): + self.outfile.write(ind_s*level + "Folder: %s\n" % f.name) - def end_folder(self, f, level): - self.outfile.write(ind_s*level + "Folder end: %s\n" % f.name) + def end_folder(self, f, level): + self.outfile.write(ind_s*level + "Folder end: %s\n" % f.name) - def bookmark(self, b, level): - self.outfile.write(ind_s*(level+1) + "Bookmark: %s\n" % b.name) + def bookmark(self, b, level): + self.outfile.write(ind_s*(level+1) + "Bookmark: %s\n" % b.name) - def ruler(self, r, level): - self.outfile.write(ind_s*(level+1) + "-----\n") + def ruler(self, r, level): + self.outfile.write(ind_s*(level+1) + "-----\n") diff --git a/bkmk-add.py b/bkmk-add.py index 860688e..df12452 100755 --- a/bkmk-add.py +++ b/bkmk-add.py @@ -15,67 +15,67 @@ from robots import robot def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "t:") + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "t:") - report_stats = 1 - title = '' + report_stats = 1 + title = '' - for _opt, _arg in optlist: - if _opt == '-s': - report_stats = 0 - elif _opt == '-t': - title = _arg - try: - del _opt, _arg - except NameError: - pass + for _opt, _arg in optlist: + if _opt == '-s': + report_stats = 0 + elif _opt == '-t': + title = _arg + try: + del _opt, _arg + except NameError: + pass - if len(args) != 1: - sys.stderr.write("bkmk-add: too many or too few arguments\n") - sys.stderr.write("Usage: bkmk-add [-s] [-t title] url\n") - sys.exit(1) + if len(args) != 1: + sys.stderr.write("bkmk-add: too many or too few arguments\n") + sys.stderr.write("Usage: bkmk-add [-s] [-t title] url\n") + sys.exit(1) - from storage import storage - storage = storage() + from storage import storage + storage = storage() - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() - root_folder = storage.load() + root_folder = storage.load() - if report_stats: - print("Ok") + if report_stats: + print("Ok") - href = args[0] - now = int(time.time()) - bookmark = Bookmark(href, str(now), '0', '0') - bookmark.name = '' - bookmark.parent = None + href = args[0] + now = int(time.time()) + bookmark = Bookmark(href, str(now), '0', '0') + bookmark.name = '' + bookmark.parent = None - global robot - robot = robot(None) + global robot + robot = robot(None) - if robot.check_url(bookmark): # get real title and last modified date - if title: # forced title - bookmark.name = title - elif hasattr(bookmark, "real_title"): - bookmark.name = bookmark.real_title - if report_stats: - sys.stdout.write("Adding %s with title '%s'\n" % (href, bookmark.name)) - del bookmark.parent - root_folder.append(bookmark) + if robot.check_url(bookmark): # get real title and last modified date + if title: # forced title + bookmark.name = title + elif hasattr(bookmark, "real_title"): + bookmark.name = bookmark.real_title + if report_stats: + sys.stdout.write("Adding %s with title '%s'\n" % (href, bookmark.name)) + del bookmark.parent + root_folder.append(bookmark) - if report_stats: - sys.stdout.write("Storing %s: " % storage.filename) - sys.stdout.flush() + if report_stats: + sys.stdout.write("Storing %s: " % storage.filename) + sys.stdout.flush() - storage.store(root_folder) + storage.store(root_folder) - if report_stats: - print("Ok") + if report_stats: + print("Ok") if __name__ == '__main__': - run() + run() diff --git a/bkmk2db.py b/bkmk2db.py index b890532..5cf29ae 100755 --- a/bkmk2db.py +++ b/bkmk2db.py @@ -16,113 +16,113 @@ __license__ = "GNU GPL" def run(): - optlist, args = getopt(sys.argv[1:], "is") + optlist, args = getopt(sys.argv[1:], "is") - show_pbar = True - report_stats = 1 - - for _opt, _arg in optlist: - if _opt == '-i': - show_pbar = 0 - if _opt == '-s': - report_stats = 0 - try: - del _opt, _arg - except NameError: - pass + show_pbar = True + report_stats = 1 + + for _opt, _arg in optlist: + if _opt == '-i': + show_pbar = 0 + if _opt == '-s': + report_stats = 0 + try: + del _opt, _arg + except NameError: + pass - if args: - if len(args) > 1: - sys.stderr.write("bkmk2db: too many arguments\n") - sys.stderr.write("Usage: bkmk2db [-is] bookmarks.html\n") - sys.exit(1) + if args: + if len(args) > 1: + sys.stderr.write("bkmk2db: too many arguments\n") + sys.stderr.write("Usage: bkmk2db [-is] bookmarks.html\n") + sys.exit(1) - filename = args[0] + filename = args[0] - else: - filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX + else: + filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX - if report_stats: - from storage import storage_name - sys.stdout.write("Converting %s to %s: " % (filename, storage_name)) - sys.stdout.flush() + if report_stats: + from storage import storage_name + sys.stdout.write("Converting %s to %s: " % (filename, storage_name)) + sys.stdout.flush() - if show_pbar: - show_pbar = sys.stderr.isatty() + if show_pbar: + show_pbar = sys.stderr.isatty() - if show_pbar: - try: - from m_lib.pbar.tty_pbar import ttyProgressBar - except ImportError: - show_pbar = 0 + if show_pbar: + try: + from m_lib.pbar.tty_pbar import ttyProgressBar + except ImportError: + show_pbar = 0 - if show_pbar: - try: - size = os.path.getsize(filename) - except: - print(filename, ": no such file") - sys.exit(1) + if show_pbar: + try: + size = os.path.getsize(filename) + except: + print(filename, ": no such file") + sys.exit(1) - if show_pbar: - pbar = ttyProgressBar(0, size) - lng = 0 + if show_pbar: + pbar = ttyProgressBar(0, size) + lng = 0 - # This is for DOS - it counts CRLF, which len() counts as 1 char! - if os.name == 'dos' or os.name == 'nt' : - dos_add = 1 - else: - dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct + # This is for DOS - it counts CRLF, which len() counts as 1 char! + if os.name == 'dos' or os.name == 'nt' : + dos_add = 1 + else: + dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct - infile = open(filename, 'r') - parser = BkmkParser() + infile = open(filename, 'r') + parser = BkmkParser() - line_no = 0 - lng = 0 - ok = 1 + line_no = 0 + lng = 0 + ok = 1 - for line in infile: - if show_pbar: - lng = lng + len(line) + dos_add - pbar.display(lng) + for line in infile: + if show_pbar: + lng = lng + len(line) + dos_add + pbar.display(lng) - #line = line.strip() - line_no = line_no + 1 + #line = line.strip() + line_no = line_no + 1 - try: - parser.feed(line) - except: - ok = 0 - break + try: + parser.feed(line) + except: + ok = 0 + break - try: - parser.close() - except: - ok = 0 + try: + parser.close() + except: + ok = 0 - infile.close() + infile.close() - if show_pbar: - del pbar + if show_pbar: + del pbar - if report_stats: - print("Ok") - print(line_no, "lines proceed") - print(parser.urls, "urls found") - print(parser.objects, "objects created") + if report_stats: + print("Ok") + print(line_no, "lines proceed") + print(parser.urls, "urls found") + print(parser.objects, "objects created") - if ok: - from storage import storage - storage = storage() - storage.store(parser.root_folder) + if ok: + from storage import storage + storage = storage() + storage.store(parser.root_folder) - else: - import traceback - traceback.print_exc() - sys.exit(1) + else: + import traceback + traceback.print_exc() + sys.exit(1) if __name__ == '__main__': - run() + run() diff --git a/bkmk_objects.py b/bkmk_objects.py index dce9811..6897721 100644 --- a/bkmk_objects.py +++ b/bkmk_objects.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot', @@ -19,205 +19,205 @@ import os, urllib BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA") class Folder(list): - isFolder = 1 - isBookmark = 0 - - def __init__(self, add_date=None, comment='', last_modified=None): - super(Folder, self).__init__() - self.comment = comment - self.add_date = add_date - self.last_modified = last_modified - - def walk_depth(self, walker, level=0): - if hasattr(self, "header"): # root folder - prune = 0 - walker.root_folder(self) - else: - prune = walker.prune_folder(self) - if not prune: - walker.start_folder(self, level) - - if not prune: - for object in self: - if object.isFolder: - object.walk_depth(walker, level+1) - elif object.isBookmark: - walker.bookmark(object, level) - else: - walker.ruler(object, level) - - walker.end_folder(self, level) + isFolder = 1 + isBookmark = 0 + + def __init__(self, add_date=None, comment='', last_modified=None): + super(Folder, self).__init__() + self.comment = comment + self.add_date = add_date + self.last_modified = last_modified + + def walk_depth(self, walker, level=0): + if hasattr(self, "header"): # root folder + prune = 0 + walker.root_folder(self) + else: + prune = walker.prune_folder(self) + if not prune: + walker.start_folder(self, level) + + if not prune: + for object in self: + if object.isFolder: + object.walk_depth(walker, level+1) + elif object.isBookmark: + walker.bookmark(object, level) + else: + walker.ruler(object, level) + + walker.end_folder(self, level) class Bookmark(object): - isFolder = 0 - isBookmark = 1 - - def __init__(self, href, add_date, last_visit=None, last_modified=None, - keyword=None, comment='', icon_href=None, icon=None, - charset=None, parser_charset=None): - protocol, request = urllib.splittype(href) - user, password, port = None, None, None - host, path = urllib.splithost(request) - if host: - user, host = urllib.splituser(host) - if user: - user, password = urllib.splitpasswd(user) - host, port = urllib.splitport(host) - if port: port = int(port) - - if protocol == 'place': - href = protocol + ":" - else: - href = protocol + "://" - if user: - href += urllib.quote(user) - if password: - href += ':' + urllib.quote(password) - href += '@' - if host: - href += host.decode(parser_charset or 'utf-8').encode('idna') - if port: - href += ':%d' % port - if path: - href += path - - self.href = href - self.add_date = add_date - self.last_visit = last_visit - self.last_modified = last_modified - self.keyword = keyword - self.comment = comment - self.icon_href = icon_href - self.icon = icon - self.charset = charset + isFolder = 0 + isBookmark = 1 + + def __init__(self, href, add_date, last_visit=None, last_modified=None, + keyword=None, comment='', icon_href=None, icon=None, + charset=None, parser_charset=None): + protocol, request = urllib.splittype(href) + user, password, port = None, None, None + host, path = urllib.splithost(request) + if host: + user, host = urllib.splituser(host) + if user: + user, password = urllib.splitpasswd(user) + host, port = urllib.splitport(host) + if port: port = int(port) + + if protocol == 'place': + href = protocol + ":" + else: + href = protocol + "://" + if user: + href += urllib.quote(user) + if password: + href += ':' + urllib.quote(password) + href += '@' + if host: + href += host.decode(parser_charset or 'utf-8').encode('idna') + if port: + href += ':%d' % port + if path: + href += path + + self.href = href + self.add_date = add_date + self.last_visit = last_visit + self.last_modified = last_modified + self.keyword = keyword + self.comment = comment + self.icon_href = icon_href + self.icon = icon + self.charset = charset class Ruler(object): - isFolder = 0 - isBookmark = 0 + isFolder = 0 + isBookmark = 0 class Walker(object): - """ - Interface class. Any instance that will be passed to Folder.walk_depth - may be derived from this class. It is not mandatory - unlike Java - Python does not require interface classes; but it is convenient to have - some methods predefined to no-op, in case you do not want to - provide end_folder etc. - """ + """ + Interface class. Any instance that will be passed to Folder.walk_depth + may be derived from this class. It is not mandatory - unlike Java + Python does not require interface classes; but it is convenient to have + some methods predefined to no-op, in case you do not want to + provide end_folder etc. + """ - def root_folder(self, r): - pass + def root_folder(self, r): + pass - def start_folder(self, f, level): - pass + def start_folder(self, f, level): + pass - def end_folder(self, f, level): - pass + def end_folder(self, f, level): + pass - def bookmark(self, b, level): - pass + def bookmark(self, b, level): + pass - def ruler(self, r, level): - pass + def ruler(self, r, level): + pass - def prune_folder(self, folder): - return 0 + def prune_folder(self, folder): + return 0 class Writer(Walker): - def __init__(self, outfile, prune=None): - self.outfile = outfile - self.prune = prune + def __init__(self, outfile, prune=None): + self.outfile = outfile + self.prune = prune - def prune_folder(self, folder): - return self.prune == folder.name + def prune_folder(self, folder): + return self.prune == folder.name class Robot(object): - def __init__(self, log): - self.log = log + def __init__(self, log): + self.log = log - def stop(self): - pass # Nothing to do on cleanup + def stop(self): + pass # Nothing to do on cleanup # Helper class to make inverese links (nodes linked to their parent) class InverseLinker(Walker): - def root_folder(self, r): - self.parent_stack = [r] + def root_folder(self, r): + self.parent_stack = [r] - def start_folder(self, f, level): - f.parent = self.parent_stack[-1] - self.parent_stack.append(f) # Push the folder onto the stack of parents + def start_folder(self, f, level): + f.parent = self.parent_stack[-1] + self.parent_stack.append(f) # Push the folder onto the stack of parents - def end_folder(self, f, level): - del self.parent_stack[-1] # Pop off the stack + def end_folder(self, f, level): + del self.parent_stack[-1] # Pop off the stack - def bookmark(self, b, level): - b.parent = self.parent_stack[-1] + def bookmark(self, b, level): + b.parent = self.parent_stack[-1] - def ruler(self, r, level): - r.parent = self.parent_stack[-1] + def ruler(self, r, level): + r.parent = self.parent_stack[-1] # Helper class to make linear represenatation of the tree class Linear(Walker): - def root_folder(self, r): - r.linear = [r] - self.linear = r.linear + def root_folder(self, r): + r.linear = [r] + self.linear = r.linear - def add_object(self, object): - self.linear.append(object) + def add_object(self, object): + self.linear.append(object) - def start_folder(self, f, level): - self.add_object(f) + def start_folder(self, f, level): + self.add_object(f) - def bookmark(self, b, level): - self.add_object(b) + def bookmark(self, b, level): + self.add_object(b) - def ruler(self, r, level): - self.add_object(r) + def ruler(self, r, level): + self.add_object(r) # Helper - make linked linear represenatation of the tree, suitable to be stored in sequential storage def make_linear(root_folder): - linker = InverseLinker() - root_folder.walk_depth(linker) + linker = InverseLinker() + root_folder.walk_depth(linker) - linear = Linear() - root_folder.walk_depth(linear) + linear = Linear() + root_folder.walk_depth(linear) # Helper, opposite of make_linear - make a tree from the linked linear representation def make_tree(linear): - root_folder = linear[0] - del linear[0] + root_folder = linear[0] + del linear[0] - for object in linear: - object.parent.append(object) + for object in linear: + object.parent.append(object) - return root_folder + return root_folder def break_tree(linear): - del linear[0] + del linear[0] - for object in linear: - del object.parent + for object in linear: + del object.parent def quote_title(title): - if BKMK_FORMAT == "MOZILLA": - title = title.replace("'", "'") - return title + if BKMK_FORMAT == "MOZILLA": + title = title.replace("'", "'") + return title def unquote_title(title): - if BKMK_FORMAT == "MOZILLA": - from HTMLParser import HTMLParser - title = HTMLParser().unescape(title.replace("&", '&').decode('utf-8')) - title = title.encode('utf-8').replace("'", "'") - return title + if BKMK_FORMAT == "MOZILLA": + from HTMLParser import HTMLParser + title = HTMLParser().unescape(title.replace("&", '&').decode('utf-8')) + title = title.encode('utf-8').replace("'", "'") + return title def parse_params(param_str): diff --git a/bkmk_parser.py b/bkmk_parser.py index 8f44f47..6a69433 100644 --- a/bkmk_parser.py +++ b/bkmk_parser.py @@ -19,208 +19,208 @@ from bkmk_objects import Folder, Bookmark, Ruler DEBUG = os.environ.has_key("BKMK_DEBUG") if DEBUG: - def debug(note): - print(note) + def debug(note): + print(note) - def dump_names(folder_stack): - l = [] - for object in folder_stack: - if object.isFolder: - l.append(object.name) - return "'%s'" % "' '".join(l) + def dump_names(folder_stack): + l = [] + for object in folder_stack: + if object.isFolder: + l.append(object.name) + return "'%s'" % "' '".join(l) else: - def debug(note): - pass - dump_names = debug + def debug(note): + pass + dump_names = debug class BkmkParser(HTMLParser): - def __init__(self): - HTMLParser.__init__(self) - - self.urls = 0 - self.objects = 0 - - self.charset = None - self.recode = None - - def handle_data(self, data): - if data: - if self.charset and default_encoding: - data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace") - self.accumulator += data - - # Mozilla - get charset - def do_meta(self, attrs): - http_equiv = "" - content = "" - - for attrname, value in attrs: - value = value.strip() - if attrname == 'http-equiv': - http_equiv = value.lower() - elif attrname == 'content': - content = value - - if http_equiv == "content-type": - try: - # extract charset from "text/html; charset=UTF-8" - self.charset = content.split('=')[1] - except IndexError: - pass - - def start_title(self, attrs): - if default_encoding: - self.accumulator += '\n' % default_encoding - self.accumulator += "" - - def end_title(self): - self.accumulator += "" - - # Start root folder - def start_h1(self, attrs): - root_folder = Folder() - self.current_object = root_folder - self.root_folder = root_folder - self.current_folder = root_folder - self.folder_stack = [root_folder] - - self.root_folder.header = self.accumulator.strip() - self.accumulator = '' - - def end_h1(self): - accumulator = self.accumulator - self.accumulator = '' - - debug("Root folder name: `%s'" % accumulator) - self.root_folder.name = accumulator - - # Start a folder - def start_h3(self, attrs): - last_modified = None - for attrname, value in attrs: - value = value.strip() - if attrname == 'add_date': - add_date = value - elif attrname == 'last_modified': - last_modified = value - - debug("New folder...") - folder = Folder(add_date, last_modified=last_modified) - self.current_object = folder - self.current_folder.append(folder) - self.folder_stack.append(folder) # push new folder - self.current_folder = folder - self.objects += 1 - - def end_h3(self): - accumulator = self.accumulator - self.accumulator = '' - - debug("Folder name: `%s'" % accumulator) - self.current_folder.name = accumulator - - # Start a bookmark - def start_a(self, attrs): - add_date = None - last_visit = None - last_modified = None - keyword = '' - icon = None - charset = None - - for attrname, value in attrs: - value = value.strip() - if attrname == "href": - href = value - elif attrname == "add_date": - add_date = value - elif attrname == "last_visit": - last_visit = value - elif attrname == "last_modified": - last_modified = value - elif attrname == "shortcuturl": - keyword = value - elif attrname == "icon": - icon = value - elif attrname == "last_charset": - charset = value - - debug("Bookmark points to: `%s'" % href) - bookmark = Bookmark(href, add_date, last_visit, last_modified, - keyword=keyword, icon=icon, - charset=charset, parser_charset=self.charset or default_encoding) - self.current_object = bookmark - self.current_folder.append(bookmark) - self.urls += 1 - self.objects += 1 - - def end_a(self): - accumulator = self.accumulator - self.accumulator = '' - - debug("Bookmark name: `%s'" % accumulator) - bookmark = self.current_folder[-1] - bookmark.name = accumulator - - def flush(self): - accumulator = self.accumulator - - if accumulator: - self.accumulator = '' - - current_object = self.current_object - if current_object: - current_object.comment += accumulator.strip() - debug("Comment: `%s'" % current_object.comment) - - def start_dl(self, attrs): - self.flush() - - do_dt = start_dl - - # End of folder - def end_dl(self): - self.flush() - debug("End folder") - debug("Folder stack: %s" % dump_names(self.folder_stack)) - if self.folder_stack: - del self.folder_stack[-1] # pop last folder - if self.folder_stack: - self.current_folder = self.folder_stack[-1] - else: - debug("FOLDER STACK is EMPTY!!! (1)") - else: - debug("FOLDER STACK is EMPTY!!! (2)") - self.current_object = None - - def close(self): - HTMLParser.close(self) - if self.folder_stack: - raise ValueError("wrong folder stack: %s" % self.folder_stack) - - def do_dd(self, attrs): - pass - - do_p = do_dd - - # Start ruler - def do_hr(self, attrs): - self.flush() - debug("Ruler") - self.current_folder.append(Ruler()) - self.current_object = None - self.objects += 1 - - # BR in comment - def do_br(self, attrs): - self.accumulator += "
" - - # Allow < in the text - def unknown_starttag(self, tag, attrs): - self.accumulator += "<%s>" % tag - - # Do not allow unknow end tags - def unknown_endtag(self, tag): - raise NotImplementedError("Unknow end tag `%s'" % tag) + def __init__(self): + HTMLParser.__init__(self) + + self.urls = 0 + self.objects = 0 + + self.charset = None + self.recode = None + + def handle_data(self, data): + if data: + if self.charset and default_encoding: + data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace") + self.accumulator += data + + # Mozilla - get charset + def do_meta(self, attrs): + http_equiv = "" + content = "" + + for attrname, value in attrs: + value = value.strip() + if attrname == 'http-equiv': + http_equiv = value.lower() + elif attrname == 'content': + content = value + + if http_equiv == "content-type": + try: + # extract charset from "text/html; charset=UTF-8" + self.charset = content.split('=')[1] + except IndexError: + pass + + def start_title(self, attrs): + if default_encoding: + self.accumulator += '\n' % default_encoding + self.accumulator += "" + + def end_title(self): + self.accumulator += "" + + # Start root folder + def start_h1(self, attrs): + root_folder = Folder() + self.current_object = root_folder + self.root_folder = root_folder + self.current_folder = root_folder + self.folder_stack = [root_folder] + + self.root_folder.header = self.accumulator.strip() + self.accumulator = '' + + def end_h1(self): + accumulator = self.accumulator + self.accumulator = '' + + debug("Root folder name: `%s'" % accumulator) + self.root_folder.name = accumulator + + # Start a folder + def start_h3(self, attrs): + last_modified = None + for attrname, value in attrs: + value = value.strip() + if attrname == 'add_date': + add_date = value + elif attrname == 'last_modified': + last_modified = value + + debug("New folder...") + folder = Folder(add_date, last_modified=last_modified) + self.current_object = folder + self.current_folder.append(folder) + self.folder_stack.append(folder) # push new folder + self.current_folder = folder + self.objects += 1 + + def end_h3(self): + accumulator = self.accumulator + self.accumulator = '' + + debug("Folder name: `%s'" % accumulator) + self.current_folder.name = accumulator + + # Start a bookmark + def start_a(self, attrs): + add_date = None + last_visit = None + last_modified = None + keyword = '' + icon = None + charset = None + + for attrname, value in attrs: + value = value.strip() + if attrname == "href": + href = value + elif attrname == "add_date": + add_date = value + elif attrname == "last_visit": + last_visit = value + elif attrname == "last_modified": + last_modified = value + elif attrname == "shortcuturl": + keyword = value + elif attrname == "icon": + icon = value + elif attrname == "last_charset": + charset = value + + debug("Bookmark points to: `%s'" % href) + bookmark = Bookmark(href, add_date, last_visit, last_modified, + keyword=keyword, icon=icon, + charset=charset, parser_charset=self.charset or default_encoding) + self.current_object = bookmark + self.current_folder.append(bookmark) + self.urls += 1 + self.objects += 1 + + def end_a(self): + accumulator = self.accumulator + self.accumulator = '' + + debug("Bookmark name: `%s'" % accumulator) + bookmark = self.current_folder[-1] + bookmark.name = accumulator + + def flush(self): + accumulator = self.accumulator + + if accumulator: + self.accumulator = '' + + current_object = self.current_object + if current_object: + current_object.comment += accumulator.strip() + debug("Comment: `%s'" % current_object.comment) + + def start_dl(self, attrs): + self.flush() + + do_dt = start_dl + + # End of folder + def end_dl(self): + self.flush() + debug("End folder") + debug("Folder stack: %s" % dump_names(self.folder_stack)) + if self.folder_stack: + del self.folder_stack[-1] # pop last folder + if self.folder_stack: + self.current_folder = self.folder_stack[-1] + else: + debug("FOLDER STACK is EMPTY!!! (1)") + else: + debug("FOLDER STACK is EMPTY!!! (2)") + self.current_object = None + + def close(self): + HTMLParser.close(self) + if self.folder_stack: + raise ValueError("wrong folder stack: %s" % self.folder_stack) + + def do_dd(self, attrs): + pass + + do_p = do_dd + + # Start ruler + def do_hr(self, attrs): + self.flush() + debug("Ruler") + self.current_folder.append(Ruler()) + self.current_object = None + self.objects += 1 + + # BR in comment + def do_br(self, attrs): + self.accumulator += "
" + + # Allow < in the text + def unknown_starttag(self, tag, attrs): + self.accumulator += "<%s>" % tag + + # Do not allow unknow end tags + def unknown_endtag(self, tag): + raise NotImplementedError("Unknow end tag `%s'" % tag) diff --git a/check_dups.py b/check_dups.py index f635c23..0a267ac 100755 --- a/check_dups.py +++ b/check_dups.py @@ -16,79 +16,79 @@ __license__ = "GNU GPL" log_file = None def report_dup(href, object_no): - s = "Duplicate URL: %s (first at rec. %d)" % (href, object_no) + s = "Duplicate URL: %s (first at rec. %d)" % (href, object_no) - if log_file: - log_file.write("%s\n" % s) - else: - print(s) + if log_file: + log_file.write("%s\n" % s) + else: + print(s) def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "sl:") + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "sl:") - report_stats = 1 - global log_file - log_filename = None + report_stats = 1 + global log_file + log_filename = None - for _opt, _arg in optlist: - if _opt == '-s': - report_stats = 0 - if _opt == '-l': - log_filename = _arg - try: - del _opt, _arg - except NameError: - pass + for _opt, _arg in optlist: + if _opt == '-s': + report_stats = 0 + if _opt == '-l': + log_filename = _arg + try: + del _opt, _arg + except NameError: + pass - if report_stats: - print("Broytman check_dups, Copyright (C) 2000-2017 PhiloSoft Design") + if report_stats: + print("Broytman check_dups, Copyright (C) 2000-2017 PhiloSoft Design") - if args: - sys.stderr.write("check_urls: too many arguments\n") - sys.stderr.write("Usage: check_urls [-s] [-l logfile]\n") - sys.exit(1) + if args: + sys.stderr.write("check_urls: too many arguments\n") + sys.stderr.write("Usage: check_urls [-s] [-l logfile]\n") + sys.exit(1) - if log_filename: - log_file = open(log_filename, 'w') + if log_filename: + log_file = open(log_filename, 'w') - from storage import storage - storage = storage() + from storage import storage + storage = storage() - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() - root_folder = storage.load() - from bkmk_objects import make_linear - make_linear(root_folder) - objects = len(root_folder.linear) + root_folder = storage.load() + from bkmk_objects import make_linear + make_linear(root_folder) + objects = len(root_folder.linear) - if report_stats: - print("Ok") + if report_stats: + print("Ok") - dup_dict = {} + dup_dict = {} - for object_no in range(objects): - object = root_folder.linear[object_no] + for object_no in range(objects): + object = root_folder.linear[object_no] - if object.isBookmark: - href = object.href - if dup_dict.has_key(href): - report_dup(href, dup_dict[href]) - else: - dup_dict[href] = object_no + if object.isBookmark: + href = object.href + if dup_dict.has_key(href): + report_dup(href, dup_dict[href]) + else: + dup_dict[href] = object_no - if log_filename: - log_file.close() + if log_filename: + log_file.close() - if report_stats: - print("Ok") - print(objects, "objects passed") + if report_stats: + print("Ok") + print(objects, "objects passed") if __name__ == '__main__': - run() + run() diff --git a/check_title.py b/check_title.py index e8215e8..ffab6fa 100755 --- a/check_title.py +++ b/check_title.py @@ -15,68 +15,68 @@ __license__ = "GNU GPL" def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "s") + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "s") - report_stats = 1 + report_stats = 1 - for _opt, _arg in optlist: - if _opt == '-s': - report_stats = 0 - try: - del _opt, _arg - except NameError: - pass + for _opt, _arg in optlist: + if _opt == '-s': + report_stats = 0 + try: + del _opt, _arg + except NameError: + pass - if report_stats: - print("Broytman check_title, Copyright (C) 2002-2017 PhiloSoft Design") + if report_stats: + print("Broytman check_title, Copyright (C) 2002-2017 PhiloSoft Design") - if args: - sys.stderr.write("check_title: too many arguments\n") - sys.stderr.write("Usage: check_title [-s]\n") - sys.exit(1) + if args: + sys.stderr.write("check_title: too many arguments\n") + sys.stderr.write("Usage: check_title [-s]\n") + sys.exit(1) - from storage import storage - storage = storage() + from storage import storage + storage = storage() - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() - root_folder = storage.load() - make_linear(root_folder) - objects = len(root_folder.linear) + root_folder = storage.load() + make_linear(root_folder) + objects = len(root_folder.linear) - if report_stats: - print("Ok") + if report_stats: + print("Ok") - for object_no in range(objects): - object = root_folder.linear[object_no] + for object_no in range(objects): + object = root_folder.linear[object_no] - if object.isBookmark: - if hasattr(object, "moved") or hasattr(object, "error") \ - or object.href.startswith('place:'): # Firefox SmartBookmarks - continue + if object.isBookmark: + if hasattr(object, "moved") or hasattr(object, "error") \ + or object.href.startswith('place:'): # Firefox SmartBookmarks + continue - if hasattr(object, "real_title") and (object.real_title is not None): - unquoted_title = unquote_title(quote_title(object.real_title)) - unquoted_name = unquote_title(object.name) - if unquoted_name != unquoted_title: - print(object.href) - print(unquoted_name) - print(unquoted_title) - print() - else: - print(object.href) - print(object.name) - print("NO REAL TITLE!!!") - print() + if hasattr(object, "real_title") and (object.real_title is not None): + unquoted_title = unquote_title(quote_title(object.real_title)) + unquoted_name = unquote_title(object.name) + if unquoted_name != unquoted_title: + print(object.href) + print(unquoted_name) + print(unquoted_title) + print() + else: + print(object.href) + print(object.name) + print("NO REAL TITLE!!!") + print() - if report_stats: - print(objects, "objects passed") + if report_stats: + print(objects, "objects passed") if __name__ == '__main__': - run() + run() diff --git a/check_url.py b/check_url.py index 8a6ec0a..46135c5 100755 --- a/check_url.py +++ b/check_url.py @@ -25,40 +25,40 @@ from Writers.bkmk_wflad import strftime def run(): - print("Broytman check_url, Copyright (C) 2010-2017 PhiloSoft Design") + print("Broytman check_url, Copyright (C) 2010-2017 PhiloSoft Design") - if len(sys.argv) < 2: - sys.stderr.write("Usage: check_url.py url1 [url2...]\n") - sys.exit(1) + if len(sys.argv) < 2: + sys.stderr.write("Usage: check_url.py url1 [url2...]\n") + sys.exit(1) - from m_lib.flog import makelog - log = makelog("check.log") + from m_lib.flog import makelog + log = makelog("check.log") - from robots import robot - robot = robot(log) + from robots import robot + robot = robot(log) - for url in sys.argv[1:]: - bookmark = Bookmark(href=url, add_date=None) - bookmark.parent = None + for url in sys.argv[1:]: + bookmark = Bookmark(href=url, add_date=None) + bookmark.parent = None - rcode = robot.check_url(bookmark) - print("check_url: %s" % rcode) + rcode = robot.check_url(bookmark) + print("check_url: %s" % rcode) - if hasattr(bookmark, 'error'): - print(bookmark.error) + if hasattr(bookmark, 'error'): + print(bookmark.error) - else: - print("""\ -URL: %s -Title: %s -LastModified: %s -IconURI: %s -Icon: %s -""" % (bookmark.href, getattr(bookmark, 'real_title', ''), strftime(bookmark.last_modified), bookmark.icon_href, bookmark.icon)) + else: + print("""\ + URL: %s + Title: %s + LastModified: %s + IconURI: %s + Icon: %s + """ % (bookmark.href, getattr(bookmark, 'real_title', ''), strftime(bookmark.last_modified), bookmark.icon_href, bookmark.icon)) - robot.stop() - log.close() + robot.stop() + log.close() if __name__ == '__main__': - run() + run() diff --git a/check_urls.py b/check_urls.py index d6d038d..a7314f3 100755 --- a/check_urls.py +++ b/check_urls.py @@ -14,150 +14,150 @@ __license__ = "GNU GPL" def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "ise") - - show_pbar = 1 - report_stats = 1 - only_errors = 0 - - for _opt, _arg in optlist: - if _opt == '-i': - show_pbar = 0 - if _opt == '-s': - report_stats = 0 - if _opt == '-e': - only_errors = 1 - try: - del _opt, _arg - except NameError: - pass - - if report_stats: - print("Broytman check_urls, Copyright (C) 1997-2017 PhiloSoft Design") - - if args: - sys.stderr.write("check_urls: too many arguments\n") - sys.stderr.write("Usage: check_urls [-ise]\n") - sys.exit(1) - - if show_pbar: - show_pbar = sys.stderr.isatty() - - if show_pbar: - try: - from m_lib.pbar.tty_pbar import ttyProgressBar - except ImportError: - show_pbar = 0 - - from m_lib.flog import makelog, openlog - if only_errors: - log = openlog("check.log") - log("chk_urls restarted for errors") - if report_stats: - print("chk_urls restarted for errors") - else: - log = makelog("check.log") - log("check_urls started") - if report_stats: - print(" check_urls: normal start") - - from storage import storage - storage = storage() - - from robots import robot - robot = robot(log) - - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() - - root_folder = storage.load() - from bkmk_objects import make_linear, break_tree - make_linear(root_folder) - objects = len(root_folder.linear) - - if report_stats: - print("Ok") - - if report_stats: - if only_errors: - s = "Rechecking errors: " - else: - s = "Checking: " - sys.stdout.write(s) - sys.stdout.flush() - - if show_pbar: - pbar = ttyProgressBar(0, objects) - - urls_no = 0 - object_count = 0 - size = 0 - - checked = {} - rcode = 1 - - for object_no in range(objects): - if show_pbar: - pbar.display(object_no+1) - - object = root_folder.linear[object_no] - object_count = object_count + 1 - - if object.isBookmark: - href = object.href - if (href.startswith('place:') # Firefox SmartBookmarks - or '%s' in href): # Bookmark with keyword - log("Skipped %s" % href) - continue - - if only_errors: - if hasattr(object, "error"): - delattr(object, "error") + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "ise") + + show_pbar = 1 + report_stats = 1 + only_errors = 0 + + for _opt, _arg in optlist: + if _opt == '-i': + show_pbar = 0 + if _opt == '-s': + report_stats = 0 + if _opt == '-e': + only_errors = 1 + try: + del _opt, _arg + except NameError: + pass + + if report_stats: + print("Broytman check_urls, Copyright (C) 1997-2017 PhiloSoft Design") + + if args: + sys.stderr.write("check_urls: too many arguments\n") + sys.stderr.write("Usage: check_urls [-ise]\n") + sys.exit(1) + + if show_pbar: + show_pbar = sys.stderr.isatty() + + if show_pbar: + try: + from m_lib.pbar.tty_pbar import ttyProgressBar + except ImportError: + show_pbar = 0 + + from m_lib.flog import makelog, openlog + if only_errors: + log = openlog("check.log") + log("chk_urls restarted for errors") + if report_stats: + print("chk_urls restarted for errors") + else: + log = makelog("check.log") + log("check_urls started") + if report_stats: + print(" check_urls: normal start") + + from storage import storage + storage = storage() + + from robots import robot + robot = robot(log) + + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() + + root_folder = storage.load() + from bkmk_objects import make_linear, break_tree + make_linear(root_folder) + objects = len(root_folder.linear) + + if report_stats: + print("Ok") + + if report_stats: + if only_errors: + s = "Rechecking errors: " + else: + s = "Checking: " + sys.stdout.write(s) + sys.stdout.flush() + + if show_pbar: + pbar = ttyProgressBar(0, objects) + + urls_no = 0 + object_count = 0 + size = 0 + + checked = {} + rcode = 1 + + for object_no in range(objects): + if show_pbar: + pbar.display(object_no+1) + + object = root_folder.linear[object_no] + object_count = object_count + 1 + + if object.isBookmark: + href = object.href + if (href.startswith('place:') # Firefox SmartBookmarks + or '%s' in href): # Bookmark with keyword + log("Skipped %s" % href) + continue + + if only_errors: + if hasattr(object, "error"): + delattr(object, "error") + else: + continue + + if checked.has_key(href): + log("Already checked %s" % href) + old_object = root_folder.linear[checked[href]] + for attr_name in ("last_visit", "last_modified", + "error", "no_error", "moved", "size", "md5", "real_title", + "last_tested", "test_time", "icon", "charset"): + if hasattr(old_object, attr_name): + setattr(object, attr_name, getattr(old_object, attr_name)) else: - continue - - if checked.has_key(href): - log("Already checked %s" % href) - old_object = root_folder.linear[checked[href]] - for attr_name in ("last_visit", "last_modified", - "error", "no_error", "moved", "size", "md5", "real_title", - "last_tested", "test_time", "icon", "charset"): - if hasattr(old_object, attr_name): - setattr(object, attr_name, getattr(old_object, attr_name)) - else: - log("Checking %s" % href) - rcode = robot.check_url(object) - - if rcode: - checked[href] = object_no - urls_no = urls_no + 1 - try: - size = size + int(object.size) - except (AttributeError, TypeError, ValueError): - pass # Some object does not have a size :( - else: - log("Interrupted by user (^C)") - break - robot.stop() - - if show_pbar: - del pbar - - if report_stats: - print("Ok") - print(object_count, "objects passed") - print(urls_no, "URLs checked") - print(size, "bytes eaten") - - break_tree(root_folder.linear) - storage.store(root_folder) - - if rcode: - log("check_urls finished ok") - log.close() + log("Checking %s" % href) + rcode = robot.check_url(object) + + if rcode: + checked[href] = object_no + urls_no = urls_no + 1 + try: + size = size + int(object.size) + except (AttributeError, TypeError, ValueError): + pass # Some object does not have a size :( + else: + log("Interrupted by user (^C)") + break + robot.stop() + + if show_pbar: + del pbar + + if report_stats: + print("Ok") + print(object_count, "objects passed") + print(urls_no, "URLs checked") + print(size, "bytes eaten") + + break_tree(root_folder.linear) + storage.store(root_folder) + + if rcode: + log("check_urls finished ok") + log.close() if __name__ == '__main__': - run() + run() diff --git a/convert_st.py b/convert_st.py index 4c0cb15..381239f 100755 --- a/convert_st.py +++ b/convert_st.py @@ -12,50 +12,50 @@ import sys def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "s") + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "s") - report_stats = 1 + report_stats = 1 - for _opt, _arg in optlist: - if _opt == '-s': - report_stats = 0 - try: - del _opt, _arg - except NameError: - pass + for _opt, _arg in optlist: + if _opt == '-s': + report_stats = 0 + try: + del _opt, _arg + except NameError: + pass - if len(args) != 1: - sys.stderr.write("convert_st: too many or too few arguments\n") - sys.stderr.write("Usage: convert_st [-s] new_storage\n") - sys.exit(1) + if len(args) != 1: + sys.stderr.write("convert_st: too many or too few arguments\n") + sys.stderr.write("Usage: convert_st [-s] new_storage\n") + sys.exit(1) - from bkmk_objects import parse_params, set_params - from storage import storage, import_storage + from bkmk_objects import parse_params, set_params + from storage import storage, import_storage - storage = storage() + storage = storage() - storage_name, storage_params = parse_params(args[0]) - new_storage = import_storage(storage_name) - set_params(new_storage, storage_params) - new_storage = new_storage() + storage_name, storage_params = parse_params(args[0]) + new_storage = import_storage(storage_name) + set_params(new_storage, storage_params) + new_storage = new_storage() - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() - root_folder = storage.load() + root_folder = storage.load() - if report_stats: - print("Ok") - sys.stdout.write("Converting to %s: " % new_storage.filename) - sys.stdout.flush() + if report_stats: + print("Ok") + sys.stdout.write("Converting to %s: " % new_storage.filename) + sys.stdout.flush() - new_storage.store(root_folder) + new_storage.store(root_folder) - if report_stats: - print("Ok") + if report_stats: + print("Ok") if __name__ == '__main__': - run() + run() diff --git a/db2bkmk.py b/db2bkmk.py index 28ebc76..e0ac1c1 100755 --- a/db2bkmk.py +++ b/db2bkmk.py @@ -12,97 +12,97 @@ import sys def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "sp:o:t:r") + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "sp:o:t:r") - report_stats = 1 - prune = None + report_stats = 1 + prune = None - from writers import writer - output_filename = writer.filename + from writers import writer + output_filename = writer.filename - transl = 0 - transl_name = "" # dictionary translation; default is no translation + transl = 0 + transl_name = "" # dictionary translation; default is no translation - for _opt, _arg in optlist: - if _opt == '-s': - report_stats = 0 - if _opt == '-p': - prune = _arg - if _opt == '-o': - output_filename = _arg - if _opt == '-t': - transl = 1 - transl_name = _arg - if _opt == '-r': - transl = 2 - try: - del _opt, _arg - except NameError: - pass + for _opt, _arg in optlist: + if _opt == '-s': + report_stats = 0 + if _opt == '-p': + prune = _arg + if _opt == '-o': + output_filename = _arg + if _opt == '-t': + transl = 1 + transl_name = _arg + if _opt == '-r': + transl = 2 + try: + del _opt, _arg + except NameError: + pass - if args: - sys.stderr.write("db2bkmk: too many arguments\n") - sys.stderr.write("Usage: db2bkmk [-s] [-p prune_folder] [-o filename] [-t trans] [-r]\n") - sys.exit(1) + if args: + sys.stderr.write("db2bkmk: too many arguments\n") + sys.stderr.write("Usage: db2bkmk [-s] [-p prune_folder] [-o filename] [-t trans] [-r]\n") + sys.exit(1) - from storage import storage - storage = storage() + from storage import storage + storage = storage() - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() - root_folder = storage.load() + root_folder = storage.load() - if report_stats: - print("Ok") - sys.stdout.write("Writing %s: " % output_filename) - sys.stdout.flush() + if report_stats: + print("Ok") + sys.stdout.write("Writing %s: " % output_filename) + sys.stdout.flush() - if transl: - new_ext = str(transl) - transl_d = {} + if transl: + new_ext = str(transl) + transl_d = {} - from m_lib.flad import fladm - transl_db = fladm.load_from_file(transl_name, fladm.check_record, ["URL1", "URL2"], [""]) - # This prevents any other key to appear in transl_db ^ + from m_lib.flad import fladm + transl_db = fladm.load_from_file(transl_name, fladm.check_record, ["URL1", "URL2"], [""]) + # This prevents any other key to appear in transl_db ^ - # Generate translation dictionary (hash table) - if transl == 1: - for record in transl_db: - transl_d[record["URL1"]] = record["URL2"] - elif transl == 2: - for record in transl_db: - transl_d[record["URL2"]] = record["URL1"] - else: - raise ValueError("transl (%d) must be 1 or 2" % transl) + # Generate translation dictionary (hash table) + if transl == 1: + for record in transl_db: + transl_d[record["URL1"]] = record["URL2"] + elif transl == 2: + for record in transl_db: + transl_d[record["URL2"]] = record["URL1"] + else: + raise ValueError("transl (%d) must be 1 or 2" % transl) - del transl_db # Save few bytes of memory + del transl_db # Save few bytes of memory - from bkmk_objects import Walker - class Transl(Walker): - def __init__(self, transl_d): - self.transl_d = transl_d + from bkmk_objects import Walker + class Transl(Walker): + def __init__(self, transl_d): + self.transl_d = transl_d - def bookmark(self, b, level): - href = b.href - transl_d = self.transl_d + def bookmark(self, b, level): + href = b.href + transl_d = self.transl_d - if transl_d.has_key(href): - b.href = transl_d[href] + if transl_d.has_key(href): + b.href = transl_d[href] - root_folder.walk_depth(Transl(transl_d)) + root_folder.walk_depth(Transl(transl_d)) - outfile = open(output_filename, 'w') - root_folder.walk_depth(writer(outfile, prune)) - outfile.close() + outfile = open(output_filename, 'w') + root_folder.walk_depth(writer(outfile, prune)) + outfile.close() - if report_stats: - print("Ok") + if report_stats: + print("Ok") if __name__ == '__main__': - run() + run() diff --git a/parse_html/__main__.py b/parse_html/__main__.py index 34b8ed4..61e04e0 100644 --- a/parse_html/__main__.py +++ b/parse_html/__main__.py @@ -14,22 +14,22 @@ __license__ = "GNU GPL" def main(): - import sys - from .bkmk_parse_html import universal_charset - - l = len(sys.argv) - if l == 3: - filename = sys.argv[1] - charset = sys.argv[2] - elif l == 2: - filename = sys.argv[1] - charset = universal_charset - else: - sys.exit("Usage: main filename [charset]") - - parser = parse_filename(filename, charset, log=lambda s: sys.stdout.write(s + '\n')) - print(" refresh:", parser.refresh) - print(" icon :", parser.icon) + import sys + from .bkmk_parse_html import universal_charset + + l = len(sys.argv) + if l == 3: + filename = sys.argv[1] + charset = sys.argv[2] + elif l == 2: + filename = sys.argv[1] + charset = universal_charset + else: + sys.exit("Usage: main filename [charset]") + + parser = parse_filename(filename, charset, log=lambda s: sys.stdout.write(s + '\n')) + print(" refresh:", parser.refresh) + print(" icon :", parser.icon) if __name__ == '__main__': main() diff --git a/parse_html/bkmk_parse_html.py b/parse_html/bkmk_parse_html.py index 2e412ad..7bc4640 100644 --- a/parse_html/bkmk_parse_html.py +++ b/parse_html/bkmk_parse_html.py @@ -19,31 +19,31 @@ DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic parsers = [] try: - from . import bkmk_ph_beautifulsoup + from . import bkmk_ph_beautifulsoup except ImportError: - pass + pass else: - bkmk_ph_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET - parsers.append(bkmk_ph_beautifulsoup.parse_html) + bkmk_ph_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET + parsers.append(bkmk_ph_beautifulsoup.parse_html) try: - from . import bkmk_ph_html5 + from . import bkmk_ph_html5 except ImportError: - pass + pass else: - parsers.append(bkmk_ph_html5.parse_html) + parsers.append(bkmk_ph_html5.parse_html) try: - from . import bkmk_ph_lxml + from . import bkmk_ph_lxml except ImportError: - pass + pass else: parsers.append(bkmk_ph_lxml.parse_html) try: - from . import bkmk_ph_htmlparser + from . import bkmk_ph_htmlparser except ImportError: - pass + pass else: parsers.append(bkmk_ph_htmlparser.parse_html) @@ -62,128 +62,128 @@ entity_re = re.compile("(&\w+;)") num_entity_re = re.compile("(&#[0-9]+;)") def recode_entities(title, charset): - output = [] - for part in entity_re.split(title): - if part not in ("&", "<", ">", """) and \ - entity_re.match(part): - _part = name2codepoint.get(part[1:-1], None) - if _part is not None: - part = unichr(_part).encode(charset) - output.append(part) - title = ''.join(output) - - output = [] - for part in num_entity_re.split(title): - if num_entity_re.match(part): - try: - part = unichr(int(part[2:-1])).encode(charset) - except UnicodeEncodeError: - pass # Leave the entity as is - output.append(part) - - return ''.join(output) + output = [] + for part in entity_re.split(title): + if part not in ("&", "<", ">", """) and \ + entity_re.match(part): + _part = name2codepoint.get(part[1:-1], None) + if _part is not None: + part = unichr(_part).encode(charset) + output.append(part) + title = ''.join(output) + + output = [] + for part in num_entity_re.split(title): + if num_entity_re.match(part): + try: + part = unichr(int(part[2:-1])).encode(charset) + except UnicodeEncodeError: + pass # Leave the entity as is + output.append(part) + + return ''.join(output) import os BKMK_DEBUG_HTML_PARSERS = os.environ.get("BKMK_DEBUG_HTML_PARSERS") def parse_html(html_text, charset=None, log=None): - if not parsers: - return None - - if charset: - try: - codecs.lookup(charset) # In case of unknown charset... - except (ValueError, LookupError): - charset = None # ...try charset from HTML - - charsets = [universal_charset, DEFAULT_CHARSET] - if charset: - charset = charset.lower().replace("windows-", "cp") - if charset in charsets: - charsets.remove(charset) - charsets.insert(0, charset) - - if BKMK_DEBUG_HTML_PARSERS: - _parsers = [] - for p in parsers: - parser = None - for c in charsets: - try: - parser = p(html_text, c, log) - except UnicodeError: - pass - else: - if parser: - if BKMK_DEBUG_HTML_PARSERS: - if log: log(" Parser %s: ok" % p.__module__) - _parsers.append((p, parser)) - break - else: - if log: log(" Parser %s: fail" % p.__module__) - if not BKMK_DEBUG_HTML_PARSERS and parser: - break - - if BKMK_DEBUG_HTML_PARSERS: - if not _parsers: - if log: log(" All parsers have failed") - return None - elif not parser: - if log: log(" All parsers have failed") - return None - - if BKMK_DEBUG_HTML_PARSERS: - p, parser = _parsers[0] - if log: log(" Using %s" % p.__module__) - - converted_title = title = parser.title - if title and (not parser.charset): - try: - unicode(title, "ascii") - except UnicodeDecodeError: - parser.charset = DEFAULT_CHARSET - - if parser.charset: - parser.charset = parser.charset.lower().replace("windows-", "cp") - - if title and parser.charset and ( - (parser.charset != universal_charset) or - ((not charset) or (charset != parser.charset))): - try: - if parser.meta_charset: - if log: log(" META charset : %s" % parser.charset) - elif (not charset) or (charset != parser.charset): - if log: log(" guessed charset: %s" % parser.charset) - #if log: log(" current charset: %s" % universal_charset) - if log: log(" title : %s" % title) - if parser.charset != universal_charset: + if not parsers: + return None + + if charset: + try: + codecs.lookup(charset) # In case of unknown charset... + except (ValueError, LookupError): + charset = None # ...try charset from HTML + + charsets = [universal_charset, DEFAULT_CHARSET] + if charset: + charset = charset.lower().replace("windows-", "cp") + if charset in charsets: + charsets.remove(charset) + charsets.insert(0, charset) + + if BKMK_DEBUG_HTML_PARSERS: + _parsers = [] + for p in parsers: + parser = None + for c in charsets: try: - converted_title = unicode(title, parser.charset).encode(universal_charset) + parser = p(html_text, c, log) except UnicodeError: - if log: log(" incorrect conversion from %s, converting from %s" % (parser.charset, DEFAULT_CHARSET)) - converted_title = unicode(title, DEFAULT_CHARSET, "replace").encode(universal_charset, "replace") - parser.charset = DEFAULT_CHARSET - if log and (converted_title != title): log(" converted title: %s" % converted_title) - except LookupError: - if log: log(" unknown charset: '%s'" % parser.charset) - else: - if log: log(" title : %s" % title) - - if title: - final_title = recode_entities(converted_title, universal_charset) - parts = [s.strip() for s in final_title.replace('\r', '').split('\n')] - final_title = ' '.join([s for s in parts if s]) - if log and (final_title != converted_title): log(" final title : %s" % final_title) - parser.title = final_title - - icon = parser.icon - if isinstance(icon, unicode): - try: - parser.icon = icon.encode('ascii') - except UnicodeEncodeError: - if parser.charset: - parser.icon = icon.encode(parser.charset) - return parser + pass + else: + if parser: + if BKMK_DEBUG_HTML_PARSERS: + if log: log(" Parser %s: ok" % p.__module__) + _parsers.append((p, parser)) + break + else: + if log: log(" Parser %s: fail" % p.__module__) + if not BKMK_DEBUG_HTML_PARSERS and parser: + break + + if BKMK_DEBUG_HTML_PARSERS: + if not _parsers: + if log: log(" All parsers have failed") + return None + elif not parser: + if log: log(" All parsers have failed") + return None + + if BKMK_DEBUG_HTML_PARSERS: + p, parser = _parsers[0] + if log: log(" Using %s" % p.__module__) + + converted_title = title = parser.title + if title and (not parser.charset): + try: + unicode(title, "ascii") + except UnicodeDecodeError: + parser.charset = DEFAULT_CHARSET + + if parser.charset: + parser.charset = parser.charset.lower().replace("windows-", "cp") + + if title and parser.charset and ( + (parser.charset != universal_charset) or + ((not charset) or (charset != parser.charset))): + try: + if parser.meta_charset: + if log: log(" META charset : %s" % parser.charset) + elif (not charset) or (charset != parser.charset): + if log: log(" guessed charset: %s" % parser.charset) + #if log: log(" current charset: %s" % universal_charset) + if log: log(" title : %s" % title) + if parser.charset != universal_charset: + try: + converted_title = unicode(title, parser.charset).encode(universal_charset) + except UnicodeError: + if log: log(" incorrect conversion from %s, converting from %s" % (parser.charset, DEFAULT_CHARSET)) + converted_title = unicode(title, DEFAULT_CHARSET, "replace").encode(universal_charset, "replace") + parser.charset = DEFAULT_CHARSET + if log and (converted_title != title): log(" converted title: %s" % converted_title) + except LookupError: + if log: log(" unknown charset: '%s'" % parser.charset) + else: + if log: log(" title : %s" % title) + + if title: + final_title = recode_entities(converted_title, universal_charset) + parts = [s.strip() for s in final_title.replace('\r', '').split('\n')] + final_title = ' '.join([s for s in parts if s]) + if log and (final_title != converted_title): log(" final title : %s" % final_title) + parser.title = final_title + + icon = parser.icon + if isinstance(icon, unicode): + try: + parser.icon = icon.encode('ascii') + except UnicodeEncodeError: + if parser.charset: + parser.icon = icon.encode(parser.charset) + return parser def parse_filename(filename, charset=None, log=None): fp = open(filename, 'r') diff --git a/parse_html/bkmk_ph_beautifulsoup.py b/parse_html/bkmk_ph_beautifulsoup.py index a2f5715..f796744 100644 --- a/parse_html/bkmk_ph_beautifulsoup.py +++ b/parse_html/bkmk_ph_beautifulsoup.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2007-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 2007-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['parse_html'] @@ -21,130 +21,130 @@ DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic # http://groups.google.com/group/beautifulsoup/browse_thread/thread/69093cb0d3a3cf63 class BadDeclParser(BeautifulSoup): def parse_declaration(self, i): - """Treat a bogus SGML declaration as raw data. Treat a CDATA - declaration as a CData object.""" - j = None - if self.rawdata[i:i+9] == '', i) - if k == -1: - k = len(self.rawdata) - data = self.rawdata[i+9:k] - j = k+3 - self._toStringSubclass(data, CData) - else: - try: - j = SGMLParser.parse_declaration(self, i) - except SGMLParseError: - # Could not parse the DOCTYPE declaration - # Try to just skip the actual declaration - match = re.search(r']*?)>', self.rawdata[i:], re.MULTILINE|re.IGNORECASE) - if match: - toHandle = self.rawdata[i:match.end()] - else: - toHandle = self.rawdata[i:] - self.handle_data(toHandle) - j = i + len(toHandle) - return j + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + # Could not parse the DOCTYPE declaration + # Try to just skip the actual declaration + match = re.search(r']*?)>', self.rawdata[i:], re.MULTILINE|re.IGNORECASE) + if match: + toHandle = self.rawdata[i:match.end()] + else: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j def _parse_html(html_text, charset): - try: - return BadDeclParser(html_text, fromEncoding=charset) - except TypeError: - return None + try: + return BadDeclParser(html_text, fromEncoding=charset) + except TypeError: + return None def parse_html(html_text, charset=None, log=None): - root = _parse_html(html_text, charset) - if root is None: - return None - - _charset = root.originalEncoding - if _charset in ("ISO-8859-2", "windows-1252", "MacCyrillic"): # Replace default - _charset = DEFAULT_CHARSET - root = _parse_html(html_text, _charset) - if root is None: - return None - - html = root.html - if html is None: - html = root - - head = html.head - if head is None: - head = html # Some sites put TITLE in HTML without HEAD - - title = head.title - if (title is None) and (html is not head): - # Some sites put TITLE in HTML outside of HEAD - title = html.title - - if title is None: - # Lookup TITLE in the root - title = root.title - - if title is not None: - if title.string: - title = title.string - else: - parts = [] - for part in title: - if not isinstance(part, basestring): - part = unicode(part) - parts.append(part.strip()) - title = ''.join(parts) - - meta = head.find(_find_contenttype, recursive=False) - if meta: - try: - meta_content = meta.get("content") - if meta_content: - __charset = meta_content.lower().split('charset=')[1].split(';')[0] - else: - __charset = False - except IndexError: # No charset in the META Content-Type - meta_charset = False - else: - meta_charset = _charset == __charset - else: - meta_charset = False - - if not meta_charset: - meta = head.find(_find_charset, recursive=False) - if meta: - meta_content = meta.get("charset") - if meta_content: - meta_charset = _charset = meta_content.lower() - - if title and (_charset or meta_charset): - title = title.encode(_charset or meta_charset) - - meta = head.find(_find_refresh, recursive=False) - if meta: - refresh = meta.get("content") - else: - refresh = None - - meta = head.find(_find_icon, recursive=False) - if meta: - icon = meta.get("href") - else: - icon = None - - if (title is None) and (refresh is None) and (icon is None): - return None - return HTMLParser(_charset, meta_charset, title, refresh, icon) + root = _parse_html(html_text, charset) + if root is None: + return None + + _charset = root.originalEncoding + if _charset in ("ISO-8859-2", "windows-1252", "MacCyrillic"): # Replace default + _charset = DEFAULT_CHARSET + root = _parse_html(html_text, _charset) + if root is None: + return None + + html = root.html + if html is None: + html = root + + head = html.head + if head is None: + head = html # Some sites put TITLE in HTML without HEAD + + title = head.title + if (title is None) and (html is not head): + # Some sites put TITLE in HTML outside of HEAD + title = html.title + + if title is None: + # Lookup TITLE in the root + title = root.title + + if title is not None: + if title.string: + title = title.string + else: + parts = [] + for part in title: + if not isinstance(part, basestring): + part = unicode(part) + parts.append(part.strip()) + title = ''.join(parts) + + meta = head.find(_find_contenttype, recursive=False) + if meta: + try: + meta_content = meta.get("content") + if meta_content: + __charset = meta_content.lower().split('charset=')[1].split(';')[0] + else: + __charset = False + except IndexError: # No charset in the META Content-Type + meta_charset = False + else: + meta_charset = _charset == __charset + else: + meta_charset = False + + if not meta_charset: + meta = head.find(_find_charset, recursive=False) + if meta: + meta_content = meta.get("charset") + if meta_content: + meta_charset = _charset = meta_content.lower() + + if title and (_charset or meta_charset): + title = title.encode(_charset or meta_charset) + + meta = head.find(_find_refresh, recursive=False) + if meta: + refresh = meta.get("content") + else: + refresh = None + + meta = head.find(_find_icon, recursive=False) + if meta: + icon = meta.get("href") + else: + icon = None + + if (title is None) and (refresh is None) and (icon is None): + return None + return HTMLParser(_charset, meta_charset, title, refresh, icon) def _find_contenttype(Tag): - return (Tag.name == "meta") and \ - (Tag.get("http-equiv", '').lower() == "content-type") + return (Tag.name == "meta") and \ + (Tag.get("http-equiv", '').lower() == "content-type") def _find_charset(Tag): - return (Tag.name == "meta") and Tag.get("charset", '') + return (Tag.name == "meta") and Tag.get("charset", '') def _find_refresh(Tag): - return (Tag.name == "meta") and \ - (Tag.get("http-equiv", '').lower() == "refresh") + return (Tag.name == "meta") and \ + (Tag.get("http-equiv", '').lower() == "refresh") def _find_icon(Tag): - return (Tag.name == "link") and \ - (Tag.get("rel", '').lower() in ('icon', 'shortcut icon')) + return (Tag.name == "link") and \ + (Tag.get("rel", '').lower() in ('icon', 'shortcut icon')) diff --git a/parse_html/bkmk_ph_etreetidy.py b/parse_html/bkmk_ph_etreetidy.py index fc596b1..09aa2a3 100644 --- a/parse_html/bkmk_ph_etreetidy.py +++ b/parse_html/bkmk_ph_etreetidy.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['parse_html'] @@ -43,8 +43,8 @@ def parse_html(html_text, charset=None, log=None): except IndexError: meta_charset = False elif m.get('charset', ''): - meta_charset = m.get('charset').lower() - break + meta_charset = m.get('charset').lower() + break else: meta_charset = False diff --git a/parse_html/bkmk_ph_htmlparser.py b/parse_html/bkmk_ph_htmlparser.py index 0798467..45e89f5 100644 --- a/parse_html/bkmk_ph_htmlparser.py +++ b/parse_html/bkmk_ph_htmlparser.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 1997-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['parse_html'] @@ -19,80 +19,80 @@ class HTMLHeadDone(Exception): pass class HTMLParser(_HTMLParser): - def __init__(self, charset=None): - _HTMLParser.__init__(self) - self.charset = charset - self.meta_charset = 0 - self.title = None - self.refresh = None - self.icon = None - - def end_head(self): - raise HTMLHeadDone() - - def do_meta(self, attrs): - http_equiv = "" - content = "" - - for attrname, value in attrs: - if value: - value = value.strip() - if attrname == 'http-equiv': - http_equiv = value.lower() - elif attrname == 'content': - content = value - elif (attrname == 'charset') and (not self.charset): - self.charset = value.lower() - self.meta_charset = 1 - - if (not self.charset) and (http_equiv == "content-type"): - try: - # extract charset from "text/html; foo; charset=UTF-8, bar; baz;" - self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0] - self.meta_charset = 1 # Remember that the charset was retrieved from - # META tag, not from the Content-Type header - except IndexError: - pass - - if http_equiv == "refresh": - self.refresh = content - - def start_title(self, attrs): - self.accumulator = '' - - def end_title(self): - if not self.title: # use only the first title - self.title = self.accumulator - - def do_link(self, attrs): - has_icon = False - href = None - - for attrname, value in attrs: - if value: - value = value.strip() - if (attrname == 'rel') and (value.lower() in ('icon', 'shortcut icon')): - has_icon = True - elif attrname == 'href': - href = value - - if has_icon: - self.icon = href + def __init__(self, charset=None): + _HTMLParser.__init__(self) + self.charset = charset + self.meta_charset = 0 + self.title = None + self.refresh = None + self.icon = None + + def end_head(self): + raise HTMLHeadDone() + + def do_meta(self, attrs): + http_equiv = "" + content = "" + + for attrname, value in attrs: + if value: + value = value.strip() + if attrname == 'http-equiv': + http_equiv = value.lower() + elif attrname == 'content': + content = value + elif (attrname == 'charset') and (not self.charset): + self.charset = value.lower() + self.meta_charset = 1 + + if (not self.charset) and (http_equiv == "content-type"): + try: + # extract charset from "text/html; foo; charset=UTF-8, bar; baz;" + self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0] + self.meta_charset = 1 # Remember that the charset was retrieved from + # META tag, not from the Content-Type header + except IndexError: + pass + + if http_equiv == "refresh": + self.refresh = content + + def start_title(self, attrs): + self.accumulator = '' + + def end_title(self): + if not self.title: # use only the first title + self.title = self.accumulator + + def do_link(self, attrs): + has_icon = False + href = None + + for attrname, value in attrs: + if value: + value = value.strip() + if (attrname == 'rel') and (value.lower() in ('icon', 'shortcut icon')): + has_icon = True + elif attrname == 'href': + href = value + + if has_icon: + self.icon = href def parse_html(html_text, charset=None, log=None): - parser = HTMLParser(charset) + parser = HTMLParser(charset) - try: - parser.feed(html_text) - except (HTMLParseError, HTMLHeadDone): - pass + try: + parser.feed(html_text) + except (HTMLParseError, HTMLHeadDone): + pass - try: - parser.close() - except (HTMLParseError, HTMLHeadDone): - pass + try: + parser.close() + except (HTMLParseError, HTMLHeadDone): + pass - if (parser.title is None) and (parser.refresh is None) and (parser.icon is None): - return None - return parser + if (parser.title is None) and (parser.refresh is None) and (parser.icon is None): + return None + return parser diff --git a/parse_html/bkmk_ph_lxml.py b/parse_html/bkmk_ph_lxml.py index 9cd29e4..1fa4791 100644 --- a/parse_html/bkmk_ph_lxml.py +++ b/parse_html/bkmk_ph_lxml.py @@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design" +__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['parse_html'] @@ -37,8 +37,8 @@ def parse_html(html_text, charset=None, log=None): except IndexError: meta_charset = False elif m.get('charset', ''): - meta_charset = m.get('charset').lower() - break + meta_charset = m.get('charset').lower() + break else: meta_charset = False diff --git a/parse_html/bkmk_ph_util.py b/parse_html/bkmk_ph_util.py index 400c8da..0e2d529 100644 --- a/parse_html/bkmk_ph_util.py +++ b/parse_html/bkmk_ph_util.py @@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2010-2012 PhiloSoft Design" +__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['HTMLParser'] @@ -13,10 +13,10 @@ __all__ = ['HTMLParser'] from HTMLParser import HTMLParser class HTMLParser(HTMLParser): - def __init__(self, charset, meta_charset, title, refresh, icon): - object.__init__(self) - self.charset = charset - self.meta_charset = meta_charset - self.title = title - self.refresh = refresh - self.icon = icon + def __init__(self, charset, meta_charset, title, refresh, icon): + object.__init__(self) + self.charset = charset + self.meta_charset = meta_charset + self.title = title + self.refresh = refresh + self.icon = icon diff --git a/robots.py b/robots.py index 77dc446..543e6ec 100644 --- a/robots.py +++ b/robots.py @@ -17,9 +17,9 @@ from bkmk_objects import parse_params, set_params robot_name, robot_params = parse_params(environ.get("BKMK_ROBOT", "forking")) def import_robot(robot_name): - exec("from Robots import bkmk_r%s" % robot_name) - exec("robot = bkmk_r%s.robot_%s" % (robot_name, robot_name)) - return robot + exec("from Robots import bkmk_r%s" % robot_name) + exec("robot = bkmk_r%s.robot_%s" % (robot_name, robot_name)) + return robot robot = import_robot(robot_name) set_params(robot, robot_params) diff --git a/set-real_title.py b/set-real_title.py index 0029221..a97a421 100755 --- a/set-real_title.py +++ b/set-real_title.py @@ -14,75 +14,75 @@ __license__ = "GNU GPL" def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "s") + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "s") - report_stats = 1 + report_stats = 1 - for _opt, _arg in optlist: - if _opt == '-s': - report_stats = 0 - try: - del _opt, _arg - except NameError: - pass + for _opt, _arg in optlist: + if _opt == '-s': + report_stats = 0 + try: + del _opt, _arg + except NameError: + pass - if report_stats: - print("Broytman set-real_title, Copyright (C) 2003-2017 PhiloSoft Design") + if report_stats: + print("Broytman set-real_title, Copyright (C) 2003-2017 PhiloSoft Design") - if args: - sys.stderr.write("set-real_title: too many arguments\n") - sys.stderr.write("Usage: set-real_title [-s]\n") - sys.exit(1) + if args: + sys.stderr.write("set-real_title: too many arguments\n") + sys.stderr.write("Usage: set-real_title [-s]\n") + sys.exit(1) - from storage import storage - storage = storage() + from storage import storage + storage = storage() - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() - root_folder = storage.load() - from bkmk_objects import make_linear - make_linear(root_folder) - objects = len(root_folder.linear) + root_folder = storage.load() + from bkmk_objects import make_linear + make_linear(root_folder) + objects = len(root_folder.linear) - if report_stats: - print("Ok") + if report_stats: + print("Ok") - changed = 0 - for object_no in range(objects): - object = root_folder.linear[object_no] + changed = 0 + for object_no in range(objects): + object = root_folder.linear[object_no] - if object.isBookmark: - if not hasattr(object, "real_title"): - continue + if object.isBookmark: + if not hasattr(object, "real_title"): + continue - real_title = object.real_title - if not real_title: - real_title = object.href - if object.name != real_title: - object.name = real_title - changed += 1 + real_title = object.real_title + if not real_title: + real_title = object.href + if object.name != real_title: + object.name = real_title + changed += 1 - if changed and report_stats: - sys.stdout.write("Saving %s: " % storage.filename) - sys.stdout.flush() + if changed and report_stats: + sys.stdout.write("Saving %s: " % storage.filename) + sys.stdout.flush() - if not changed and report_stats: - sys.stdout.write("No need to save data\n") - sys.stdout.flush() + if not changed and report_stats: + sys.stdout.write("No need to save data\n") + sys.stdout.flush() - if changed: - storage.store(root_folder) + if changed: + storage.store(root_folder) - if changed and report_stats: - print("Ok") - print(objects, "objects passed") - print(changed, "objects changed") + if changed and report_stats: + print("Ok") + print(objects, "objects passed") + print(changed, "objects changed") if __name__ == '__main__': - run() + run() diff --git a/set-title-list.py b/set-title-list.py index 74acae4..6a49742 100755 --- a/set-title-list.py +++ b/set-title-list.py @@ -14,106 +14,106 @@ __license__ = "GNU GPL" def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "s") + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "s") - report_stats = 1 + report_stats = 1 - for _opt, _arg in optlist: - if _opt == '-s': - report_stats = 0 - try: - del _opt, _arg - except NameError: - pass + for _opt, _arg in optlist: + if _opt == '-s': + report_stats = 0 + try: + del _opt, _arg + except NameError: + pass - if report_stats: - print("Broytman set-title-list, Copyright (C) 2003-2017 PhiloSoft Design") + if report_stats: + print("Broytman set-title-list, Copyright (C) 2003-2017 PhiloSoft Design") - if len(args) != 1: - sys.stderr.write("Usage: set-title-list [-s] title_list_file\n") - sys.exit(1) + if len(args) != 1: + sys.stderr.write("Usage: set-title-list [-s] title_list_file\n") + sys.exit(1) - # Read the external file with titles and build a mapping (URL => title) - titles_dict = {} + # Read the external file with titles and build a mapping (URL => title) + titles_dict = {} - URL = None - title = None + URL = None + title = None - title_list_file = open(args[0], 'r') - for line in title_list_file: - line = line[:-1] # strip trailing newline - if URL is None: - URL = line + title_list_file = open(args[0], 'r') + for line in title_list_file: + line = line[:-1] # strip trailing newline + if URL is None: + URL = line - elif title is None: - title = line + elif title is None: + title = line - elif line: # the third line in every 3 lines must be empty - raise ValueError("line is not empty for URL `%s', title `%s': line `%s'" % (URL, title, line)) + elif line: # the third line in every 3 lines must be empty + raise ValueError("line is not empty for URL `%s', title `%s': line `%s'" % (URL, title, line)) - else: # We've got 3 lines - add new entry to the mapping - if titles_dict.has_key(URL): - if title != titles_dict[URL]: - raise ValueError("titles are not identical for URL `%s': `%s' != `%s'" % (URL, title, titles_dict[URL])) + else: # We've got 3 lines - add new entry to the mapping + if titles_dict.has_key(URL): + if title != titles_dict[URL]: + raise ValueError("titles are not identical for URL `%s': `%s' != `%s'" % (URL, title, titles_dict[URL])) - else: - titles_dict[URL] = title + else: + titles_dict[URL] = title - # reset - URL = None - title = None + # reset + URL = None + title = None - title_list_file.close() + title_list_file.close() - from storage import storage - storage = storage() + from storage import storage + storage = storage() - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() - root_folder = storage.load() - from bkmk_objects import make_linear, break_tree - make_linear(root_folder) - objects = len(root_folder.linear) + root_folder = storage.load() + from bkmk_objects import make_linear, break_tree + make_linear(root_folder) + objects = len(root_folder.linear) - if report_stats: - print("Ok") + if report_stats: + print("Ok") - # Run through the list of objects and check URLs/titles - changed = 0 - for object_no in range(objects): - object = root_folder.linear[object_no] + # Run through the list of objects and check URLs/titles + changed = 0 + for object_no in range(objects): + object = root_folder.linear[object_no] - if object.isBookmark: - URL = object.href - if titles_dict.has_key(URL): - name = titles_dict[URL] - if object.name != name: - object.name = name - changed += 1 + if object.isBookmark: + URL = object.href + if titles_dict.has_key(URL): + name = titles_dict[URL] + if object.name != name: + object.name = name + changed += 1 - if changed and report_stats: - sys.stdout.write("Saving %s: " % storage.filename) - sys.stdout.flush() + if changed and report_stats: + sys.stdout.write("Saving %s: " % storage.filename) + sys.stdout.flush() - if not changed and report_stats: - sys.stdout.write("No need to save data\n") - sys.stdout.flush() + if not changed and report_stats: + sys.stdout.write("No need to save data\n") + sys.stdout.flush() - if changed: - break_tree(root_folder.linear) - storage.store(root_folder) + if changed: + break_tree(root_folder.linear) + storage.store(root_folder) - if changed and report_stats: - print("Ok") - print(objects, "objects passed") - print(changed, "objects changed") + if changed and report_stats: + print("Ok") + print(objects, "objects passed") + print(changed, "objects changed") if __name__ == '__main__': - run() + run() diff --git a/sort_db.py b/sort_db.py index 9a478bf..03026e6 100755 --- a/sort_db.py +++ b/sort_db.py @@ -20,101 +20,101 @@ import sys class SortBy(object): - def __init__(self, sort_by): - self.sort_by = sort_by + def __init__(self, sort_by): + self.sort_by = sort_by - def __call__(self, o1, o2): - try: - attr1 = int(getattr(o1, self.sort_by)) - except (AttributeError, TypeError, ValueError): - return 1 + def __call__(self, o1, o2): + try: + attr1 = int(getattr(o1, self.sort_by)) + except (AttributeError, TypeError, ValueError): + return 1 - try: - attr2 = int(getattr(o2, self.sort_by)) - except (AttributeError, TypeError, ValueError): - return -1 + try: + attr2 = int(getattr(o2, self.sort_by)) + except (AttributeError, TypeError, ValueError): + return -1 - return cmp(attr1, attr2) + return cmp(attr1, attr2) def walk_linear(linear, walker): - for object in linear: - if object.isBookmark: - walker.bookmark(object, 0) + for object in linear: + if object.isBookmark: + walker.bookmark(object, 0) def run(): - from getopt import getopt - optlist, args = getopt(sys.argv[1:], "avmztrs") - - sort_by = "last_modified" - reverse = 0 - report_stats = 1 - - for _opt, _arg in optlist: - if _opt == '-a': - sort_by = "add_date" - elif _opt == '-v': - sort_by = "last_visit" - elif _opt == '-m': - sort_by = "last_modified" - elif _opt == '-z': - sort_by = "size" - elif _opt == '-t': - sort_by = "last_tested" - elif _opt == '-r': - reverse = 1 - elif _opt == '-s': - report_stats = 0 - try: - del _opt, _arg - except NameError: - pass - - from storage import storage - storage = storage() - - if report_stats: - sys.stdout.write("Loading %s: " % storage.filename) - sys.stdout.flush() - - root_folder = storage.load() - - if report_stats: - print("Ok") - sys.stdout.write("Sorting (by %s): " % sort_by) - sys.stdout.flush() - - from bkmk_objects import make_linear - make_linear(root_folder) - - linear = root_folder.linear - del linear[0] # exclude root folder from sorting - - by = SortBy(sort_by) - linear.sort(by) - - from writers import writer - output_filename = "%s-sorted_by-%s" % (writer.filename, sort_by) - - if reverse: - linear.reverse() - output_filename = output_filename + "-reverse" - - if report_stats: - print("done") - sys.stdout.write("Writing %s: " % output_filename) - sys.stdout.flush() - - outfile = open(output_filename, 'w') - writer = writer(outfile) - writer.root_folder(root_folder) - walk_linear(linear, writer) - outfile.close() - - if report_stats: - print("Ok") + from getopt import getopt + optlist, args = getopt(sys.argv[1:], "avmztrs") + + sort_by = "last_modified" + reverse = 0 + report_stats = 1 + + for _opt, _arg in optlist: + if _opt == '-a': + sort_by = "add_date" + elif _opt == '-v': + sort_by = "last_visit" + elif _opt == '-m': + sort_by = "last_modified" + elif _opt == '-z': + sort_by = "size" + elif _opt == '-t': + sort_by = "last_tested" + elif _opt == '-r': + reverse = 1 + elif _opt == '-s': + report_stats = 0 + try: + del _opt, _arg + except NameError: + pass + + from storage import storage + storage = storage() + + if report_stats: + sys.stdout.write("Loading %s: " % storage.filename) + sys.stdout.flush() + + root_folder = storage.load() + + if report_stats: + print("Ok") + sys.stdout.write("Sorting (by %s): " % sort_by) + sys.stdout.flush() + + from bkmk_objects import make_linear + make_linear(root_folder) + + linear = root_folder.linear + del linear[0] # exclude root folder from sorting + + by = SortBy(sort_by) + linear.sort(by) + + from writers import writer + output_filename = "%s-sorted_by-%s" % (writer.filename, sort_by) + + if reverse: + linear.reverse() + output_filename = output_filename + "-reverse" + + if report_stats: + print("done") + sys.stdout.write("Writing %s: " % output_filename) + sys.stdout.flush() + + outfile = open(output_filename, 'w') + writer = writer(outfile) + writer.root_folder(root_folder) + walk_linear(linear, writer) + outfile.close() + + if report_stats: + print("Ok") if __name__ == '__main__': - run() + run() diff --git a/storage.py b/storage.py index 7b6ca06..a71cdeb 100644 --- a/storage.py +++ b/storage.py @@ -17,9 +17,9 @@ from bkmk_objects import parse_params, set_params storage_name, storage_params = parse_params(environ.get("BKMK_STORAGE", "pickle")) def import_storage(storage_name): - exec("from Storage import bkmk_st%s" % storage_name) - exec("storage = bkmk_st%s.storage_%s" % (storage_name, storage_name)) - return storage + exec("from Storage import bkmk_st%s" % storage_name) + exec("storage = bkmk_st%s.storage_%s" % (storage_name, storage_name)) + return storage storage = import_storage(storage_name) set_params(storage, storage_params) diff --git a/subproc.py b/subproc.py index c7fd116..0280aad 100644 --- a/subproc.py +++ b/subproc.py @@ -16,7 +16,7 @@ Subprocess class features: __version__ = "Revision: 1.15 " -# Id: subproc.py,v 1.15 1998/12/14 20:53:16 klm Exp +# Id: subproc.py,v 1.15 1998/12/14 20:53:16 klm Exp # Originally by ken manheimer, ken.manheimer@nist.gov, jan 1995. # Prior art: Initially based python code examples demonstrating usage of pipes @@ -206,7 +206,7 @@ class Subprocess: got0 = self.readPendingChars(n) got = got + got0 n = n - len(got0) - return got + return got def readPendingChars(self, max=None): """Read all currently pending subprocess output as a single string.""" return self.readbuf.readPendingChars(max) @@ -401,15 +401,15 @@ class ReadBuf: got = "" if self.buf: - if (max > 0) and (len(self.buf) > max): - got = self.buf[0:max] - self.buf = self.buf[max:] - else: - got, self.buf = self.buf, '' - return got + if (max > 0) and (len(self.buf) > max): + got = self.buf[0:max] + self.buf = self.buf[max:] + else: + got, self.buf = self.buf, '' + return got if self.eof: - return '' + return '' sel = select.select([self.fd], [], [self.fd], 0) if sel[2]: @@ -590,7 +590,7 @@ class Ph: line = string.splitfields(line, ':') it[string.strip(line[0])] = ( string.strip(string.join(line[1:]))) - + def getreply(self): """Consume next response from ph, returning list of lines or string err.""" diff --git a/writers.py b/writers.py index 952a232..25c3cdb 100644 --- a/writers.py +++ b/writers.py @@ -17,9 +17,9 @@ from bkmk_objects import parse_params, set_params writer_name, writer_params = parse_params(environ.get("BKMK_WRITER", "html")) def import_writer(writer_name): - exec("from Writers import bkmk_w%s" % writer_name) - exec("writer = bkmk_w%s.writer_%s" % (writer_name, writer_name)) - return writer + exec("from Writers import bkmk_w%s" % writer_name) + exec("writer = bkmk_w%s.writer_%s" % (writer_name, writer_name)) + return writer writer = import_writer(writer_name) set_params(writer, writer_params)