]> git.phdru.name Git - bookmarks_db.git/commitdiff
Cleanup code: use 4 spaces
authorOleg Broytman <phd@phdru.name>
Sat, 13 May 2017 19:34:41 +0000 (22:34 +0300)
committerOleg Broytman <phd@phdru.name>
Sat, 13 May 2017 19:39:57 +0000 (22:39 +0300)
35 files changed:
Robots/bkmk_rforking.py
Robots/bkmk_rforking_sub.py
Robots/bkmk_robot_base.py
Robots/bkmk_rurllib.py
Storage/bkmk_stflad.py
Storage/bkmk_stjson.py
Storage/bkmk_stpickle.py
Writers/bkmk_wflad.py
Writers/bkmk_wflad_err.py
Writers/bkmk_whtml.py
Writers/bkmk_wtxt.py
bkmk-add.py
bkmk2db.py
bkmk_objects.py
bkmk_parser.py
check_dups.py
check_title.py
check_url.py
check_urls.py
convert_st.py
db2bkmk.py
parse_html/__main__.py
parse_html/bkmk_parse_html.py
parse_html/bkmk_ph_beautifulsoup.py
parse_html/bkmk_ph_etreetidy.py
parse_html/bkmk_ph_htmlparser.py
parse_html/bkmk_ph_lxml.py
parse_html/bkmk_ph_util.py
robots.py
set-real_title.py
set-title-list.py
sort_db.py
storage.py
subproc.py
writers.py

index 4ad998403417398b15643f3de2809c4c7c382f22..cad8b2d0978e9ccb95c6560fa9164e047a991664 100644 (file)
@@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['robot_forking']
@@ -13,9 +13,9 @@ __all__ = ['robot_forking']
 import sys, os
 
 try:
-   import cPickle as pickle
+    import cPickle as pickle
 except ImportError:
-   import pickle
+    import pickle
 
 from subproc import Subprocess, RecordFile
 from bkmk_objects import Robot
@@ -29,74 +29,74 @@ check_subp = None
 subp_pipe = None
 
 def stop_subp(log):
-   global check_subp, subp_pipe
-   if check_subp:
-      if log: log("   restarting hanging subprocess")
-      del check_subp
-   del subp_pipe
+    global check_subp, subp_pipe
+    if check_subp:
+        if log: log("   restarting hanging subprocess")
+        del check_subp
+    del subp_pipe
 
 def restart_subp(log):
-   global check_subp, subp_pipe
-   stop_subp(log)
+    global check_subp, subp_pipe
+    stop_subp(log)
 
-   check_subp = Subprocess("%s/Robots/bkmk_rforking_sub.py" % os.path.dirname(sys.argv[0]),
-      control_stderr=True)
-   subp_pipe = RecordFile(check_subp)
+    check_subp = Subprocess("%s/Robots/bkmk_rforking_sub.py" % os.path.dirname(sys.argv[0]),
+       control_stderr=True)
+    subp_pipe = RecordFile(check_subp)
 
 
 _set_subproc = True
 
 class robot_forking(Robot):
-   subproc = 'urllib2' # Default subprocess
-
-   def check_url(self, bookmark):
-      global _set_subproc
-      if _set_subproc:
-         _set_subproc = False
-
-         subproc = self.subproc
-         subproc_attrs = []
-         for attr in dir(self):
-             if attr.startswith('subproc_'):
-                 subproc_attrs.append((attr[len('subproc_'):], getattr(self, attr)))
-         if subproc_attrs:
-             subproc += ':' + ':'.join(['='.join((k,v)) for k,v in subproc_attrs])
-         os.environ['BKMK_ROBOT'] = subproc
-
-      if not check_subp:
-         restart_subp(self.log) # Not restart, just start afresh
-
-      try:
-         save_parent = bookmark.parent
-         bookmark.parent = None
-         subp_pipe.write_record(pickle.dumps(bookmark))
-
-         if check_subp.waitForPendingChar(60): # wait a minute
-            new_b = pickle.loads(subp_pipe.read_record())
-            for attr in ("error", "no_error",
-                  "moved", "size", "md5", "real_title",
-                  "last_tested", "last_modified", "test_time",
-                  "icon", "icon_href"):
-               if hasattr(new_b, attr):
-                  setattr(bookmark, attr, getattr(new_b, attr))
-         else:
-            bookmark.error = "Subprocess connection timed out"
-            restart_subp(self.log)
-
-         bookmark.parent = save_parent
-
-         while True:
-            error = check_subp.readPendingErrLine()
-            if not error:
-               break
-            sys.stderr.write("(subp) " + error)
-         sys.stderr.flush()
-
-      except KeyboardInterrupt:
-         return 0
-
-      # Tested
-      return 1
-
-   def stop(self):
-      stop_subp(None) # Stop subprocess; do not log restarting
+    subproc = 'urllib2' # Default subprocess
+
+    def check_url(self, bookmark):
+        global _set_subproc
+        if _set_subproc:
+            _set_subproc = False
+
+            subproc = self.subproc
+            subproc_attrs = []
+            for attr in dir(self):
+                if attr.startswith('subproc_'):
+                    subproc_attrs.append((attr[len('subproc_'):], getattr(self, attr)))
+            if subproc_attrs:
+                subproc += ':' + ':'.join(['='.join((k,v)) for k,v in subproc_attrs])
+            os.environ['BKMK_ROBOT'] = subproc
+
+        if not check_subp:
+            restart_subp(self.log) # Not restart, just start afresh
+
+        try:
+            save_parent = bookmark.parent
+            bookmark.parent = None
+            subp_pipe.write_record(pickle.dumps(bookmark))
+
+            if check_subp.waitForPendingChar(60): # wait a minute
+                new_b = pickle.loads(subp_pipe.read_record())
+                for attr in ("error", "no_error",
+                      "moved", "size", "md5", "real_title",
+                      "last_tested", "last_modified", "test_time",
+                      "icon", "icon_href"):
+                    if hasattr(new_b, attr):
+                        setattr(bookmark, attr, getattr(new_b, attr))
+            else:
+                bookmark.error = "Subprocess connection timed out"
+                restart_subp(self.log)
+
+            bookmark.parent = save_parent
+
+            while True:
+                error = check_subp.readPendingErrLine()
+                if not error:
+                    break
+                sys.stderr.write("(subp) " + error)
+            sys.stderr.flush()
+
+        except KeyboardInterrupt:
+            return 0
+
+        # Tested
+        return 1
+
+    def stop(self):
+        stop_subp(None) # Stop subprocess; do not log restarting
index 7dd1f5fa7b7da797bff902338afe0507665e4335..9b25b5b98e5bd28d514c84c21f735ae5f9d77ae7 100755 (executable)
@@ -6,7 +6,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1999-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1999-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = []
@@ -18,32 +18,32 @@ lib_dir = os.path.normpath(os.path.join(os.path.dirname(sys.argv[0]), os.pardir)
 sys.path.append(lib_dir) # for bkmk_objects.py
 
 try:
-   import cPickle
-   pickle = cPickle
+    import cPickle
+    pickle = cPickle
 except ImportError:
-   import pickle
+    import pickle
 
 from subproc import RecordFile
 
 
 def run():
-   bkmk_in = RecordFile(sys.stdin)
-   bkmk_out = RecordFile(sys.stdout)
+    bkmk_in = RecordFile(sys.stdin)
+    bkmk_out = RecordFile(sys.stdout)
 
-   from m_lib.flog import openlog
-   log = openlog("check2.log")
-   from robots import robot
-   robot = robot(log)
+    from m_lib.flog import openlog
+    log = openlog("check2.log")
+    from robots import robot
+    robot = robot(log)
 
-   while 1:
-      bookmark = pickle.loads(bkmk_in.read_record())
-      log(bookmark.href)
-      robot.check_url(bookmark)
-      bkmk_out.write_record(pickle.dumps(bookmark))
-      log.outfile.flush()
+    while 1:
+        bookmark = pickle.loads(bkmk_in.read_record())
+        log(bookmark.href)
+        robot.check_url(bookmark)
+        bkmk_out.write_record(pickle.dumps(bookmark))
+        log.outfile.flush()
 
-   log.close()
+    log.close()
 
 
 if __name__ == '__main__':
-   run()
+    run()
index 8dd032b714e076a75f4bd38ab8db10d65ed45830..0600e7338d2b71c41492d73858acf72d0818ae84 100644 (file)
@@ -35,204 +35,204 @@ reloc_dict = {
 
 
 def get_error(e):
-   if isinstance(e, str):
-      return e
+    if isinstance(e, str):
+        return e
 
-   else:
-      s = []
-      for i in e:
-         s.append("'%s'" % str(i).replace('\n', "\\n"))
-      return "(%s)" % ' '.join(s)
+    else:
+        s = []
+        for i in e:
+            s.append("'%s'" % str(i).replace('\n', "\\n"))
+        return "(%s)" % ' '.join(s)
 
 
 icons = {} # Icon cache; maps URL to a tuple (content type, data)
            # or None if there is no icon.
 
 class robot_base(Robot):
-   timeout = 60
+    timeout = 60
 
-   def __init__(self, *args, **kw):
+    def __init__(self, *args, **kw):
         Robot.__init__(self, *args, **kw)
         socket.setdefaulttimeout(int(self.timeout))
 
-   def check_url(self, bookmark):
-      try:
-         self.start = int(time.time())
-         bookmark.icon = None
+    def check_url(self, bookmark):
+        try:
+            self.start = int(time.time())
+            bookmark.icon = None
 
-         url_type, url_rest = urllib.splittype(bookmark.href)
-         url_host, url_path = urllib.splithost(url_rest)
-         url_path, url_tag  = urllib.splittag(url_path)
+            url_type, url_rest = urllib.splittype(bookmark.href)
+            url_host, url_path = urllib.splithost(url_rest)
+            url_path, url_tag  = urllib.splittag(url_path)
 
-         url = "%s://%s%s" % (url_type, url_host, url_path)
-         error, redirect_code, redirect_to, headers, content = self.get(bookmark, url, True)
+            url = "%s://%s%s" % (url_type, url_host, url_path)
+            error, redirect_code, redirect_to, headers, content = self.get(bookmark, url, True)
 
-         if error:
-             bookmark.error = error
-             return 1
+            if error:
+                bookmark.error = error
+                return 1
 
-         if redirect_code:
-             self.set_redirect(bookmark, redirect_code, redirect_to)
-             return 1
+            if redirect_code:
+                self.set_redirect(bookmark, redirect_code, redirect_to)
+                return 1
 
-         size = 0
-         last_modified = None
+            size = 0
+            last_modified = None
 
-         if headers:
-            try:
-               size = headers["Content-Length"]
-            except KeyError:
-               size = len(content)
+            if headers:
+                try:
+                    size = headers["Content-Length"]
+                except KeyError:
+                    size = len(content)
 
-            try:
-               last_modified = headers["Last-Modified"]
-            except KeyError:
-               pass
+                try:
+                    last_modified = headers["Last-Modified"]
+                except KeyError:
+                    pass
+
+                if last_modified:
+                    last_modified = parse_time(last_modified)
+            else:
+                size = len(content)
 
             if last_modified:
-               last_modified = parse_time(last_modified)
-         else:
-            size = len(content)
-
-         if last_modified:
-            last_modified = str(int(last_modified))
-         else:
-            last_modified = bookmark.last_visit
-
-         bookmark.size = size
-         bookmark.last_modified = last_modified
-
-         md5 = md5wrapper()
-         if url_type == "ftp": # Pass welcome message through MD5
-            md5.update(self.get_ftp_welcome())
-
-         md5.update(content)
-         bookmark.md5 = str(md5)
-
-         if headers:
-            try:
-               content_type = headers["Content-Type"]
-               self.log("   Content-Type: %s" % content_type)
-               try:
-                  # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
-                  content_type, charset = content_type.split(';', 1)
-                  content_type = content_type.strip()
-                  charset = charset.split('=')[1].strip().split(',')[0]
-                  self.log("   HTTP charset   : %s" % charset)
-               except (ValueError, IndexError):
-                  charset = None
-                  self.log("   no charset in Content-Type header")
-               for ctype in ("text/html", "application/xhtml+xml"):
-                  if content_type.startswith(ctype):
-                      html = True
-                      break
-               else:
-                  html = False
-               if html:
-                  parser = parse_html(content, charset, self.log)
-                  if parser:
-                      bookmark.real_title = parser.title
-                      icon = parser.icon
-                  else:
-                     icon = None
-                  if not icon:
-                     icon = "/favicon.ico"
-                  icon_url = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon)
-                  self.log("   looking for icon at: %s" % icon_url)
-                  if icon_url in icons:
-                     if icons[icon_url]:
-                        bookmark.icon_href = icon_url
-                        content_type, bookmark.icon = icons[icon_url]
-                        self.log("   cached icon: %s" % content_type)
-                     else:
-                        self.log("   cached icon: no icon")
-                  else:
-                     try:
-                        _icon_url = icon_url
-                        for i in range(8):
-                           error, icon_redirect_code, icon_redirect_to, \
-                              icon_headers, icon_data = \
-                                 self.get(bookmark, _icon_url)
-                           if icon_redirect_code:
-                              _icon_url = icon_redirect_to
-                              self.log("   redirect to : %s" % _icon_url)
-                           else:
-                              if icon_data is None:
-                                   raise IOError("No icon")
-                              break
+                last_modified = str(int(last_modified))
+            else:
+                last_modified = bookmark.last_visit
+
+            bookmark.size = size
+            bookmark.last_modified = last_modified
+
+            md5 = md5wrapper()
+            if url_type == "ftp": # Pass welcome message through MD5
+                md5.update(self.get_ftp_welcome())
+
+            md5.update(content)
+            bookmark.md5 = str(md5)
+
+            if headers:
+                try:
+                    content_type = headers["Content-Type"]
+                    self.log("   Content-Type: %s" % content_type)
+                    try:
+                        # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+                        content_type, charset = content_type.split(';', 1)
+                        content_type = content_type.strip()
+                        charset = charset.split('=')[1].strip().split(',')[0]
+                        self.log("   HTTP charset   : %s" % charset)
+                    except (ValueError, IndexError):
+                        charset = None
+                        self.log("   no charset in Content-Type header")
+                    for ctype in ("text/html", "application/xhtml+xml"):
+                        if content_type.startswith(ctype):
+                            html = True
+                            break
+                    else:
+                        html = False
+                    if html:
+                        parser = parse_html(content, charset, self.log)
+                        if parser:
+                            bookmark.real_title = parser.title
+                            icon = parser.icon
                         else:
-                           raise IOError("Too many redirects")
-                     except:
-                        etype, emsg, tb = sys.exc_info()
-                        self.log("   no icon        : %s %s" % (etype, emsg))
-                        etype = emsg = tb = None
-                        icons[icon_url] = None
-                     else:
-                        content_type = icon_headers["Content-Type"]
-                        if content_type.startswith("application/") \
-                              or content_type.startswith("image/") \
-                              or content_type.startswith("text/plain"):
-                           bookmark.icon_href = icon_url
-                           self.log("   got icon       : %s" % content_type)
-                           if content_type.startswith("application/") \
-                                 or content_type.startswith("text/plain"):
-                              self.log("   non-image content type, assume x-icon")
-                              content_type = 'image/x-icon'
-                           bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data))
-                           icons[icon_url] = (content_type, bookmark.icon)
+                            icon = None
+                        if not icon:
+                            icon = "/favicon.ico"
+                        icon_url = urljoin("%s://%s%s" % (url_type, url_host, url_path), icon)
+                        self.log("   looking for icon at: %s" % icon_url)
+                        if icon_url in icons:
+                            if icons[icon_url]:
+                                bookmark.icon_href = icon_url
+                                content_type, bookmark.icon = icons[icon_url]
+                                self.log("   cached icon: %s" % content_type)
+                            else:
+                                self.log("   cached icon: no icon")
                         else:
-                           self.log("   no icon        : bad content type '%s'" % content_type)
-                           icons[icon_url] = None
-                  if parser and parser.refresh:
-                     refresh = parser.refresh
-                     try:
-                        url = refresh.split('=', 1)[1]
-                     except IndexError:
-                        url = "self"
-                     try:
-                        timeout = float(refresh.split(';')[0])
-                     except (IndexError, ValueError):
-                        self.set_redirect(bookmark, "html", "Bad redirect to %s (%s)" % (url, refresh))
-                     else:
-                        try:
-                           timeout = int(refresh.split(';')[0])
-                        except ValueError:
-                           pass # float timeout
-                        self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout))
-
-            except KeyError as key:
-               self.log("   no header: %s" % key)
-
-      except EOFError:
-         bookmark.error = "Unexpected EOF (FTP server closed connection)"
-         self.log('   EOF: %s' % bookmark.error)
-
-      except KeyboardInterrupt:
-         self.log("Keyboard interrupt (^C)")
-         return 0
-
-      except socket.error as e:
-         bookmark.error = get_error(e)
-         self.log(bookmark.error)
-
-      except:
-         import traceback
-         traceback.print_exc()
-         bookmark.error = "Exception!"
-         self.log('   Exception: %s' % bookmark.error)
-
-      finally:
-         self.finish_check_url(bookmark)
-
-      # Tested
-      return 1
-
-   def set_redirect(self, bookmark, errcode, newurl):
+                            try:
+                                _icon_url = icon_url
+                                for i in range(8):
+                                    error, icon_redirect_code, icon_redirect_to, \
+                                       icon_headers, icon_data = \
+                                          self.get(bookmark, _icon_url)
+                                    if icon_redirect_code:
+                                        _icon_url = icon_redirect_to
+                                        self.log("   redirect to : %s" % _icon_url)
+                                    else:
+                                        if icon_data is None:
+                                            raise IOError("No icon")
+                                        break
+                                else:
+                                    raise IOError("Too many redirects")
+                            except:
+                                etype, emsg, tb = sys.exc_info()
+                                self.log("   no icon        : %s %s" % (etype, emsg))
+                                etype = emsg = tb = None
+                                icons[icon_url] = None
+                            else:
+                                content_type = icon_headers["Content-Type"]
+                                if content_type.startswith("application/") \
+                                      or content_type.startswith("image/") \
+                                      or content_type.startswith("text/plain"):
+                                    bookmark.icon_href = icon_url
+                                    self.log("   got icon       : %s" % content_type)
+                                    if content_type.startswith("application/") \
+                                          or content_type.startswith("text/plain"):
+                                        self.log("   non-image content type, assume x-icon")
+                                        content_type = 'image/x-icon'
+                                    bookmark.icon = "data:%s;base64,%s" % (content_type, b64encode(icon_data))
+                                    icons[icon_url] = (content_type, bookmark.icon)
+                                else:
+                                    self.log("   no icon        : bad content type '%s'" % content_type)
+                                    icons[icon_url] = None
+                        if parser and parser.refresh:
+                            refresh = parser.refresh
+                            try:
+                                url = refresh.split('=', 1)[1]
+                            except IndexError:
+                                url = "self"
+                            try:
+                                timeout = float(refresh.split(';')[0])
+                            except (IndexError, ValueError):
+                                self.set_redirect(bookmark, "html", "Bad redirect to %s (%s)" % (url, refresh))
+                            else:
+                                try:
+                                    timeout = int(refresh.split(';')[0])
+                                except ValueError:
+                                    pass # float timeout
+                                self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout))
+
+                except KeyError as key:
+                    self.log("   no header: %s" % key)
+
+        except EOFError:
+            bookmark.error = "Unexpected EOF (FTP server closed connection)"
+            self.log('   EOF: %s' % bookmark.error)
+
+        except KeyboardInterrupt:
+            self.log("Keyboard interrupt (^C)")
+            return 0
+
+        except socket.error as e:
+            bookmark.error = get_error(e)
+            self.log(bookmark.error)
+
+        except:
+            import traceback
+            traceback.print_exc()
+            bookmark.error = "Exception!"
+            self.log('   Exception: %s' % bookmark.error)
+
+        finally:
+            self.finish_check_url(bookmark)
+
+        # Tested
+        return 1
+
+    def set_redirect(self, bookmark, errcode, newurl):
         bookmark.moved = "(%s) to %s" % (reloc_dict[errcode], newurl)
         self.log('   Moved: %s' % bookmark.moved)
 
-   def finish_check_url(self, bookmark):
-      start = self.start
-      bookmark.last_tested = str(start)
-      now = int(time.time())
-      bookmark.test_time = str(now - start)
+    def finish_check_url(self, bookmark):
+        start = self.start
+        bookmark.last_tested = str(start)
+        now = int(time.time())
+        bookmark.test_time = str(now - start)
index 6f15ad5cf193aecd5832dd5ed7837ca2f936e232..c7324e315bcd7ad5a5c1c1b785bcf2c139a29f91 100644 (file)
@@ -17,38 +17,38 @@ from Robots.bkmk_robot_base import robot_base, get_error
 
 
 class RedirectException(Exception):
-   def __init__(self, errcode, newurl):
-      Exception.__init__(self)
-      self.errcode = errcode
-      self.newurl = newurl
+    def __init__(self, errcode, newurl):
+        Exception.__init__(self)
+        self.errcode = errcode
+        self.newurl = newurl
 
 class MyURLopener(urllib.URLopener):
-   # Error 301 -- relocated (permanently)
-   def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 
-      if headers.has_key('location'):
-         newurl = headers['location']
-      elif headers.has_key('uri'):
-         newurl = headers['uri']
-      else:
-         newurl = "Nowhere"
-      raise RedirectException(errcode, newurl)
-
-   # Error 302 -- relocated (temporarily)
-   http_error_302 = http_error_301
-   # Error 303 -- relocated (see other)
-   http_error_303 = http_error_301
-   # Error 307 -- relocated (temporarily)
-   http_error_307 = http_error_301
-
-   # Error 401 -- authentication required
-   def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 
-      raise IOError(('http error', errcode, "Authentication required ", headers))
-
-   def http_error_default(self, url, fp, errcode, errmsg, headers):
-      if fp:
-         void = fp.read()
-         fp.close()
-      raise IOError(('http error', errcode, errmsg, headers))
+    # Error 301 -- relocated (permanently)
+    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
+        if headers.has_key('location'):
+            newurl = headers['location']
+        elif headers.has_key('uri'):
+            newurl = headers['uri']
+        else:
+            newurl = "Nowhere"
+        raise RedirectException(errcode, newurl)
+
+    # Error 302 -- relocated (temporarily)
+    http_error_302 = http_error_301
+    # Error 303 -- relocated (see other)
+    http_error_303 = http_error_301
+    # Error 307 -- relocated (temporarily)
+    http_error_307 = http_error_301
+
+    # Error 401 -- authentication required
+    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
+        raise IOError(('http error', errcode, "Authentication required ", headers))
+
+    def http_error_default(self, url, fp, errcode, errmsg, headers):
+        if fp:
+            void = fp.read()
+            fp.close()
+        raise IOError(('http error', errcode, errmsg, headers))
 
 
 urllib._urlopener = MyURLopener()
@@ -71,55 +71,55 @@ urllib_ftpwrapper = urllib.ftpwrapper
 ftpcache_key = None
 
 class myftpwrapper(urllib_ftpwrapper):
-   def __init__(self, user, passwd, host, port, dirs):
-      urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
-      global ftpcache_key
-      ftpcache_key = (user, host, port, '/'.join(dirs))
+    def __init__(self, user, passwd, host, port, dirs):
+        urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
+        global ftpcache_key
+        ftpcache_key = (user, host, port, '/'.join(dirs))
 
 urllib.ftpwrapper = myftpwrapper
 
 
 class robot_urllib(robot_base):
-   def get(self, bookmark, url, accept_charset=False):
-      try:
-         # Set fake referer to the base URL
-         urllib._urlopener.addheaders[2] = ('Referer', url)
-
-         if accept_charset and bookmark.charset:
-            urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
-         try:
-            fname, headers = urllib.urlretrieve(url)
-         finally:
+    def get(self, bookmark, url, accept_charset=False):
+        try:
+            # Set fake referer to the base URL
+            urllib._urlopener.addheaders[2] = ('Referer', url)
+
             if accept_charset and bookmark.charset:
-               del urllib._urlopener.addheaders[-1] # Remove Accept-Charset
-
-         infile = open(fname, 'rb')
-         content = infile.read()
-         infile.close()
-
-         return None, None, None, headers, content
-
-      except RedirectException as e:
-         return None, e.errcode, e.newurl, None, None
-
-      except IOError as e:
-         if (e[0] == "http error") and (e[1] == -1):
-            error = None
-            bookmark.no_error = "The server did not return any header - it is not an error, actually"
-            self.log('   no headers: %s' % bookmark.no_error)
-         else:
-            error = get_error(e)
-            self.log('   Error: %s' % error)
-
-         return error, None, None, None, None
-
-   def get_ftp_welcome(self):
-      global ftpcache_key
-      _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
-      ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
-                          # If there are - ftpcache_key in prev line is invalid.
-      return _welcome
-
-   def finish_check_url(self, bookmark):
-      robot_base.finish_check_url(self, bookmark)
-      urllib.urlcleanup()
+                urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
+            try:
+                fname, headers = urllib.urlretrieve(url)
+            finally:
+                if accept_charset and bookmark.charset:
+                    del urllib._urlopener.addheaders[-1] # Remove Accept-Charset
+
+            infile = open(fname, 'rb')
+            content = infile.read()
+            infile.close()
+
+            return None, None, None, headers, content
+
+        except RedirectException as e:
+            return None, e.errcode, e.newurl, None, None
+
+        except IOError as e:
+            if (e[0] == "http error") and (e[1] == -1):
+                error = None
+                bookmark.no_error = "The server did not return any header - it is not an error, actually"
+                self.log('   no headers: %s' % bookmark.no_error)
+            else:
+                error = get_error(e)
+                self.log('   Error: %s' % error)
+
+            return error, None, None, None, None
+
+    def get_ftp_welcome(self):
+        global ftpcache_key
+        _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
+        ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
+                            # If there are - ftpcache_key in prev line is invalid.
+        return _welcome
+
+    def finish_check_url(self, bookmark):
+        robot_base.finish_check_url(self, bookmark)
+        urllib.urlcleanup()
index b936c4a723b5485295ace7ed958706d23fabaa84..63ee0400b5b55f9d47339abef630859ecbbc9853 100644 (file)
@@ -18,123 +18,123 @@ from bkmk_objects import Folder, Bookmark, Ruler, Walker
 
 
 class storage_flad(Walker):
-   filename = "bookmarks_db.flad"
-
-   def __init__(self):
-      self.first_object = 1
-
-   def root_folder(self, f):
-      header = string.replace(f.header, ".", ".\n")
-      header = string.replace(header, "<", "\n<", 3)[1:]
-      header_file = open("header", 'w')
-      header_file.write(header + "\n")
-      header_file.write('<H1>%s</H1>\n\n' % f.name)
-      if f.comment: header_file.write('<DD>%s\n' % f.comment)
-      header_file.close()
-
-   def start_folder(self, f, level):
-      self.outfile.write("""
-Level: %d
-Folder: %s
-AddDate: %s
-Comment: %s
-LastModified: %s
-""" % (level, f.name, f.add_date, f.comment, f.last_modified or ''))
-
-   def bookmark(self, b, level):
-      self.outfile.write("""
-Level: %d
-Title: %s
-URL: %s
-AddDate: %s
-LastVisit: %s
-LastModified: %s
-Keyword: %s
-Comment: %s
-IconURI: %s
-Icon: %s
-Charset: %s
-""" % (level+1, b.name, b.href, b.add_date, b.last_visit, b.last_modified,
-         b.keyword, b.comment.replace('\n', "\\n"),
-         b.icon_href or '', b.icon or '', b.charset or ''))
-
-   def ruler(self, r, level):
-      self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
-
-   def store(self, root_folder):
-      self.outfile = open(self.filename, 'w')
-      root_folder.walk_depth(self)
-      self.outfile.close()
-
-   def unindent(self, old_level, new_level):
-      while old_level > new_level:
-         old_level = old_level - 1
-         del self.folder_stack[-1]
-
-      if self.folder_stack:
-         self.current_folder = self.folder_stack[-1]
-      else:
-         self.current_folder = None
-
-   def load(self):
-      bookmarks_db = fladm.load_from_file(self.filename, fladm.check_record, ["Level"])
-
-      root_folder = Folder()
-      self.folder_stack = [root_folder]
-      self.current_folder = root_folder
-
-      header_file = open("header", 'r')
-      header = header_file.read()
-      header_file.close()
-
-      header = string.split(header, "\n")
-      root_folder.header = string.join(header[:5], '')
-      root_folder.name = header[5][4:-5]
-      root_folder.comment = string.join(header[7:], '')[4:]
-
-      save_level = 0
-      got_folder = 1 # Start as if we already have one folder
-
-      for record in bookmarks_db:
-         level = int(record["Level"])
-
-         if level == save_level:
-            pass
-         elif level == save_level + 1:
-            if not got_folder:
-               raise ValueError("indent without folder")
-         elif level <= save_level - 1:
-            self.unindent(save_level, level)
-         else:
-            raise ValueError("new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1))
-
-         save_level = level
-         got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
-
-         if record.has_key("URL"):
-            comment = record["Comment"].replace("\\n", '\n')
-            bookmark = Bookmark(record["URL"], record["AddDate"],
-               record["LastVisit"], record["LastModified"],
-               record["Keyword"], comment)
-            bookmark.name = record["Title"]
-            self.current_folder.append(bookmark)
-
-         elif record.has_key("Folder"):
-            folder = Folder(record["AddDate"], record["Comment"], record["LastModified"])
-            folder.name = record["Folder"]
-            self.current_folder.append(folder)
-            self.folder_stack.append(folder)
-            self.current_folder = folder
-
-         elif record.has_key("Ruler"):
-            self.current_folder.append(Ruler())
-
-         else:
-            raise KeyError("neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record))
-
-      if save_level >= 0:
-         self.unindent(save_level, 0)
-      else:
-         raise ValueError("new level (%d) too little - must be >= 0" % save_level)
-
-      return root_folder
+    filename = "bookmarks_db.flad"
+
+    def __init__(self):
+        self.first_object = 1
+
+    def root_folder(self, f):
+        header = string.replace(f.header, ".", ".\n")
+        header = string.replace(header, "<", "\n<", 3)[1:]
+        header_file = open("header", 'w')
+        header_file.write(header + "\n")
+        header_file.write('<H1>%s</H1>\n\n' % f.name)
+        if f.comment: header_file.write('<DD>%s\n' % f.comment)
+        header_file.close()
+
+    def start_folder(self, f, level):
+        self.outfile.write("""
+  Level: %d
+  Folder: %s
+  AddDate: %s
+  Comment: %s
+  LastModified: %s
+  """ % (level, f.name, f.add_date, f.comment, f.last_modified or ''))
+
+    def bookmark(self, b, level):
+        self.outfile.write("""
+  Level: %d
+  Title: %s
+  URL: %s
+  AddDate: %s
+  LastVisit: %s
+  LastModified: %s
+  Keyword: %s
+  Comment: %s
+  IconURI: %s
+  Icon: %s
+  Charset: %s
+  """ % (level+1, b.name, b.href, b.add_date, b.last_visit, b.last_modified,
+           b.keyword, b.comment.replace('\n', "\\n"),
+           b.icon_href or '', b.icon or '', b.charset or ''))
+
+    def ruler(self, r, level):
+        self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
+
+    def store(self, root_folder):
+        self.outfile = open(self.filename, 'w')
+        root_folder.walk_depth(self)
+        self.outfile.close()
+
+    def unindent(self, old_level, new_level):
+        while old_level > new_level:
+            old_level = old_level - 1
+            del self.folder_stack[-1]
+
+        if self.folder_stack:
+            self.current_folder = self.folder_stack[-1]
+        else:
+            self.current_folder = None
+
+    def load(self):
+        bookmarks_db = fladm.load_from_file(self.filename, fladm.check_record, ["Level"])
+
+        root_folder = Folder()
+        self.folder_stack = [root_folder]
+        self.current_folder = root_folder
+
+        header_file = open("header", 'r')
+        header = header_file.read()
+        header_file.close()
+
+        header = string.split(header, "\n")
+        root_folder.header = string.join(header[:5], '')
+        root_folder.name = header[5][4:-5]
+        root_folder.comment = string.join(header[7:], '')[4:]
+
+        save_level = 0
+        got_folder = 1 # Start as if we already have one folder
+
+        for record in bookmarks_db:
+            level = int(record["Level"])
+
+            if level == save_level:
+                pass
+            elif level == save_level + 1:
+                if not got_folder:
+                    raise ValueError("indent without folder")
+            elif level <= save_level - 1:
+                self.unindent(save_level, level)
+            else:
+                raise ValueError("new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1))
+
+            save_level = level
+            got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
+
+            if record.has_key("URL"):
+                comment = record["Comment"].replace("\\n", '\n')
+                bookmark = Bookmark(record["URL"], record["AddDate"],
+                   record["LastVisit"], record["LastModified"],
+                   record["Keyword"], comment)
+                bookmark.name = record["Title"]
+                self.current_folder.append(bookmark)
+
+            elif record.has_key("Folder"):
+                folder = Folder(record["AddDate"], record["Comment"], record["LastModified"])
+                folder.name = record["Folder"]
+                self.current_folder.append(folder)
+                self.folder_stack.append(folder)
+                self.current_folder = folder
+
+            elif record.has_key("Ruler"):
+                self.current_folder.append(Ruler())
+
+            else:
+                raise KeyError("neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record))
+
+        if save_level >= 0:
+            self.unindent(save_level, 0)
+        else:
+            raise ValueError("new level (%d) too little - must be >= 0" % save_level)
+
+        return root_folder
index 86e0270fd55b034fd6b91d651e8f7d0feb037375..5fbeda88ea110d2098c30594b00b86aba9df24b6 100644 (file)
@@ -5,16 +5,16 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['storage_json']
 
 
 try:
-   import json
+    import json
 except ImportError:
-   import simplejson as json
+    import simplejson as json
 
 from bkmk_objects import Folder, Bookmark, Ruler, Walker
 
@@ -115,9 +115,9 @@ class storage_json(Walker):
         self.current_folder = root_folder
 
         if "type" not in bookmarks_dict:
-           bookmarks_dict["id"] = "0"
-           bookmarks_dict["title"] = ""
-           bookmarks_dict["type"] = "text/x-moz-place-container"
+            bookmarks_dict["id"] = "0"
+            bookmarks_dict["title"] = ""
+            bookmarks_dict["type"] = "text/x-moz-place-container"
         self.load_folder(root_folder, bookmarks_dict)
         if self.folder_stack:
             raise RuntimeError('Excessive folder stack: %s' % self.folder_stack)
index 444cf35c3395d9a851c09284d4e63fe85d90fcc6..2598cf6a7453e20c7b5f1c38b91e116436abd196 100644 (file)
@@ -5,31 +5,31 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['storage_pickle']
 
 
 try:
-   import cPickle
-   pickle = cPickle
+    import cPickle
+    pickle = cPickle
 
 except ImportError:
-   import pickle
+    import pickle
 
 
 class storage_pickle(object):
-   filename = "bookmarks_db.pickle"
+    filename = "bookmarks_db.pickle"
 
-   def store(self, root_folder):
-      outfile = open(self.filename, 'wb')
-      pickle.dump(root_folder, outfile, 1)
-      outfile.close()
+    def store(self, root_folder):
+        outfile = open(self.filename, 'wb')
+        pickle.dump(root_folder, outfile, 1)
+        outfile.close()
 
-   def load(self):
-      infile = open(self.filename, 'rb')
-      root_folder = pickle.load(infile)
-      infile.close()
+    def load(self):
+        infile = open(self.filename, 'rb')
+        root_folder = pickle.load(infile)
+        infile.close()
 
-      return root_folder
+        return root_folder
index 999872b1e488a93c062a3caa3514f0281da33439..11cd1436b398a18fbf77d664a59c30d51b01de87 100644 (file)
@@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['writer_flad']
@@ -15,53 +15,53 @@ from bkmk_objects import Writer
 
 
 def strftime(s):
-   try:
-      return time.strftime("%a %d %b %Y %T", time.localtime(int(s)))
-   except (TypeError, ValueError): # s is None or is already formatted
-      return s
+    try:
+        return time.strftime("%a %d %b %Y %T", time.localtime(int(s)))
+    except (TypeError, ValueError): # s is None or is already formatted
+        return s
 
 
 class writer_flad(Writer):
-   filename = "bookmarks_db.flad"
-
-   def __init__(self, outfile, prune=None):
-      Writer.__init__(self, outfile, prune)
-      self.first_object = 1
-
-   def start_folder(self, f, level):
-      self.outfile.write("""
-Level: %d
-Folder: %s
-AddDate: %s
-Comment: %s
-LastModified: %s
-""" % (level, f.name, strftime(f.add_date), f.comment, strftime(f.last_modified)))
-
-   def bookmark(self, b, level):
-      self.outfile.write("""
-Level: %d
-Title: %s
-URL: %s
-AddDate: %s
-LastVisit: %s
-LastModified: %s
-Keyword: %s
-Comment: %s""" % (level+1, b.name, b.href, strftime(b.add_date), strftime(b.last_visit), strftime(b.last_modified), b.keyword, b.comment))
-
-      for attr_name, attr_out in (("error", "Error"), ("no_error", "NoError"),
-            ("moved", "Moved"), ("size", "Size"), ("md5", "Md5"),
-            ("real_title", "RealTitle"), ("test_time", "TestTime"),
-            ("icon_href", "IconURI"), ("icon", "Icon"), ("charset", "Charset")):
-         if hasattr(b, attr_name):
-            value = getattr(b, attr_name)
-            if isinstance(value, unicode):
-               value = value.encode('utf-8')
-            self.outfile.write("\n%s: %s" % (attr_out, value))
-
-      if hasattr(b, "last_tested"):
-         self.outfile.write("\n%s: %s" % ("LastTested", strftime(getattr(b, "last_tested"))))
-
-      self.outfile.write("\n")
-
-   def ruler(self, r, level):
-      self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
+    filename = "bookmarks_db.flad"
+
+    def __init__(self, outfile, prune=None):
+        Writer.__init__(self, outfile, prune)
+        self.first_object = 1
+
+    def start_folder(self, f, level):
+        self.outfile.write("""
+  Level: %d
+  Folder: %s
+  AddDate: %s
+  Comment: %s
+  LastModified: %s
+  """ % (level, f.name, strftime(f.add_date), f.comment, strftime(f.last_modified)))
+
+    def bookmark(self, b, level):
+        self.outfile.write("""
+  Level: %d
+  Title: %s
+  URL: %s
+  AddDate: %s
+  LastVisit: %s
+  LastModified: %s
+  Keyword: %s
+  Comment: %s""" % (level+1, b.name, b.href, strftime(b.add_date), strftime(b.last_visit), strftime(b.last_modified), b.keyword, b.comment))
+
+        for attr_name, attr_out in (("error", "Error"), ("no_error", "NoError"),
+              ("moved", "Moved"), ("size", "Size"), ("md5", "Md5"),
+              ("real_title", "RealTitle"), ("test_time", "TestTime"),
+              ("icon_href", "IconURI"), ("icon", "Icon"), ("charset", "Charset")):
+            if hasattr(b, attr_name):
+                value = getattr(b, attr_name)
+                if isinstance(value, unicode):
+                    value = value.encode('utf-8')
+                self.outfile.write("\n%s: %s" % (attr_out, value))
+
+        if hasattr(b, "last_tested"):
+            self.outfile.write("\n%s: %s" % ("LastTested", strftime(getattr(b, "last_tested"))))
+
+        self.outfile.write("\n")
+
+    def ruler(self, r, level):
+        self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
index 418a3e05c001f760e9e1368bbe0634b6eb511de9..a48484c4aea599d835b012fdbd13c2cab73c950e 100644 (file)
@@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['writer_flad_err']
@@ -15,8 +15,8 @@ from bkmk_wflad import writer_flad
 
 
 class writer_flad_err(writer_flad):
-   filename = "bookmarks_db.errors"
+    filename = "bookmarks_db.errors"
 
-   def bookmark(self, b, level):
-      if hasattr(b, "error"):
-         writer_flad.bookmark(self, b, level)
+    def bookmark(self, b, level):
+        if hasattr(b, "error"):
+            writer_flad.bookmark(self, b, level)
index 707b52a68becf5bb00db6181fa3645bf2f247aa9..f7a1d654a2274780cb2284cf64040926e10f8a27 100644 (file)
@@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['writer_html']
@@ -15,51 +15,51 @@ from bkmk_objects import Writer, BKMK_FORMAT, quote_title
 
 
 def dump_comment(comment):
-   comment = comment.replace("<BR>\n", "\n")
-   if BKMK_FORMAT == "NETSCAPE":
-      comment = comment.replace("\n", "<BR>\n")
-   return comment
+    comment = comment.replace("<BR>\n", "\n")
+    if BKMK_FORMAT == "NETSCAPE":
+        comment = comment.replace("\n", "<BR>\n")
+    return comment
 
 ind_s = " "*4
 
 class writer_html(Writer):
-   filename = "bookmarks.html"
+    filename = "bookmarks.html"
 
-   def _folder(self, f, level):
-      if f.comment: self.outfile.write('<DD>%s\n' % dump_comment(f.comment))
-      self.outfile.write(ind_s*level + "<DL><p>\n")
+    def _folder(self, f, level):
+        if f.comment: self.outfile.write('<DD>%s\n' % dump_comment(f.comment))
+        self.outfile.write(ind_s*level + "<DL><p>\n")
 
-   def root_folder(self, f):
-      self.outfile.write("<!DOCTYPE NETSCAPE-Bookmark-file-1>\n")
-      self.outfile.write(f.header + "\n")
-      self.outfile.write('<H1>%s</H1>\n\n' % quote_title(f.name))
-      self._folder(f, 0)
+    def root_folder(self, f):
+        self.outfile.write("<!DOCTYPE NETSCAPE-Bookmark-file-1>\n")
+        self.outfile.write(f.header + "\n")
+        self.outfile.write('<H1>%s</H1>\n\n' % quote_title(f.name))
+        self._folder(f, 0)
 
-   def start_folder(self, f, level):
-      self.outfile.write(ind_s*level + '<DT><H3 ADD_DATE="%s"' % f.add_date)
-      if (BKMK_FORMAT == "MOZILLA") and f.last_modified: self.outfile.write(' LAST_MODIFIED="%s"' % f.last_modified)
-      self.outfile.write('>%s</H3>\n' % quote_title(f.name))
-      self._folder(f, level)
+    def start_folder(self, f, level):
+        self.outfile.write(ind_s*level + '<DT><H3 ADD_DATE="%s"' % f.add_date)
+        if (BKMK_FORMAT == "MOZILLA") and f.last_modified: self.outfile.write(' LAST_MODIFIED="%s"' % f.last_modified)
+        self.outfile.write('>%s</H3>\n' % quote_title(f.name))
+        self._folder(f, level)
 
-   def end_folder(self, f, level):
-      self.outfile.write(ind_s*level + "</DL><p>\n")
+    def end_folder(self, f, level):
+        self.outfile.write(ind_s*level + "</DL><p>\n")
 
-   def bookmark(self, b, level):
-      self.outfile.write(ind_s*(level+1) + '<DT><A HREF="%s" ADD_DATE="%s"' % (b.href, b.add_date))
-      if b.last_visit: self.outfile.write(' LAST_VISIT="%s"' % b.last_visit)
-      if b.last_modified:
-         self.outfile.write(' LAST_MODIFIED="%s"' % b.last_modified)
-      if BKMK_FORMAT == "MOZILLA":
-         if b.keyword: self.outfile.write(' SHORTCUTURL="%s"' % b.keyword)
-         if b.icon_href:
-            value = b.icon_href
-            if isinstance(value, unicode):
-               value = value.encode('utf-8')
-            self.outfile.write(' ICON_URI="%s"' % value)
-         if b.icon: self.outfile.write(' ICON="%s"' % b.icon)
-         if b.charset: self.outfile.write(' LAST_CHARSET="%s"' % b.charset)
-      self.outfile.write('>%s</A>\n' % quote_title(b.name))
-      if b.comment: self.outfile.write('<DD>%s\n' % dump_comment(b.comment))
+    def bookmark(self, b, level):
+        self.outfile.write(ind_s*(level+1) + '<DT><A HREF="%s" ADD_DATE="%s"' % (b.href, b.add_date))
+        if b.last_visit: self.outfile.write(' LAST_VISIT="%s"' % b.last_visit)
+        if b.last_modified:
+            self.outfile.write(' LAST_MODIFIED="%s"' % b.last_modified)
+        if BKMK_FORMAT == "MOZILLA":
+            if b.keyword: self.outfile.write(' SHORTCUTURL="%s"' % b.keyword)
+            if b.icon_href:
+                value = b.icon_href
+                if isinstance(value, unicode):
+                    value = value.encode('utf-8')
+                self.outfile.write(' ICON_URI="%s"' % value)
+            if b.icon: self.outfile.write(' ICON="%s"' % b.icon)
+            if b.charset: self.outfile.write(' LAST_CHARSET="%s"' % b.charset)
+        self.outfile.write('>%s</A>\n' % quote_title(b.name))
+        if b.comment: self.outfile.write('<DD>%s\n' % dump_comment(b.comment))
 
-   def ruler(self, r, level):
-      self.outfile.write(ind_s*(level+1) + "<HR>\n")
+    def ruler(self, r, level):
+        self.outfile.write(ind_s*(level+1) + "<HR>\n")
index ba2196db138b9ea050ffaa082bef72dffbb64d4e..77b76d34e671adf57c444da27a288dbc8eab76bf 100644 (file)
@@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['writer_txt']
@@ -16,19 +16,19 @@ ind_s = " "*4
 
 
 class writer_txt(Writer):
-   filename = "dump.txt"
+    filename = "dump.txt"
 
-   def root_folder(self, f):
-      self.outfile.write("Folder: %s\n" % f.name)
+    def root_folder(self, f):
+        self.outfile.write("Folder: %s\n" % f.name)
 
-   def start_folder(self, f, level):
-      self.outfile.write(ind_s*level + "Folder: %s\n" % f.name)
+    def start_folder(self, f, level):
+        self.outfile.write(ind_s*level + "Folder: %s\n" % f.name)
 
-   def end_folder(self, f, level):
-      self.outfile.write(ind_s*level + "Folder end: %s\n" % f.name)
+    def end_folder(self, f, level):
+        self.outfile.write(ind_s*level + "Folder end: %s\n" % f.name)
 
-   def bookmark(self, b, level):
-      self.outfile.write(ind_s*(level+1) + "Bookmark: %s\n" % b.name)
+    def bookmark(self, b, level):
+        self.outfile.write(ind_s*(level+1) + "Bookmark: %s\n" % b.name)
 
-   def ruler(self, r, level):
-      self.outfile.write(ind_s*(level+1) + "-----\n")
+    def ruler(self, r, level):
+        self.outfile.write(ind_s*(level+1) + "-----\n")
index 860688e50acdfb520fed0c0ad848494f572f52ac..df12452433c27f4d9c5539cc28f7d5186aeb0541 100755 (executable)
@@ -15,67 +15,67 @@ from robots import robot
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "t:")
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "t:")
 
-   report_stats = 1
-   title = ''
+    report_stats = 1
+    title = ''
 
-   for _opt, _arg in optlist:
-      if _opt == '-s':
-         report_stats = 0
-      elif _opt == '-t':
-         title = _arg
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+        elif _opt == '-t':
+            title = _arg
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
 
-   if len(args) != 1:
-      sys.stderr.write("bkmk-add: too many or too few arguments\n")
-      sys.stderr.write("Usage: bkmk-add [-s] [-t title] url\n")
-      sys.exit(1)
+    if len(args) != 1:
+        sys.stderr.write("bkmk-add: too many or too few arguments\n")
+        sys.stderr.write("Usage: bkmk-add [-s] [-t title] url\n")
+        sys.exit(1)
 
-   from storage import storage
-   storage = storage()
+    from storage import storage
+    storage = storage()
 
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
 
-   root_folder = storage.load()
+    root_folder = storage.load()
 
-   if report_stats:
-      print("Ok")
+    if report_stats:
+        print("Ok")
 
-   href = args[0]
-   now = int(time.time())
-   bookmark = Bookmark(href, str(now), '0', '0')
-   bookmark.name = ''
-   bookmark.parent = None
+    href = args[0]
+    now = int(time.time())
+    bookmark = Bookmark(href, str(now), '0', '0')
+    bookmark.name = ''
+    bookmark.parent = None
 
-   global robot
-   robot = robot(None)
+    global robot
+    robot = robot(None)
 
-   if robot.check_url(bookmark): # get real title and last modified date
-      if title: # forced title
-         bookmark.name = title
-      elif hasattr(bookmark, "real_title"):
-         bookmark.name = bookmark.real_title
-      if report_stats:
-         sys.stdout.write("Adding %s with title '%s'\n" % (href, bookmark.name))
-      del bookmark.parent
-      root_folder.append(bookmark)
+    if robot.check_url(bookmark): # get real title and last modified date
+        if title: # forced title
+            bookmark.name = title
+        elif hasattr(bookmark, "real_title"):
+            bookmark.name = bookmark.real_title
+        if report_stats:
+            sys.stdout.write("Adding %s with title '%s'\n" % (href, bookmark.name))
+        del bookmark.parent
+        root_folder.append(bookmark)
 
-      if report_stats:
-         sys.stdout.write("Storing %s: " % storage.filename)
-         sys.stdout.flush()
+        if report_stats:
+            sys.stdout.write("Storing %s: " % storage.filename)
+            sys.stdout.flush()
 
-      storage.store(root_folder)
+        storage.store(root_folder)
 
-      if report_stats:
-         print("Ok")
+        if report_stats:
+            print("Ok")
 
 
 if __name__ == '__main__':
-   run()
+    run()
index b8905324ab2d620b04acb03b57dfc920390d54f0..5cf29aed53d4abfe607fb48b56aacdf4329b75cd 100755 (executable)
@@ -16,113 +16,113 @@ __license__ = "GNU GPL"
 
 
 def run():
-   optlist, args = getopt(sys.argv[1:], "is")
+    optlist, args = getopt(sys.argv[1:], "is")
 
-   show_pbar = True
-   report_stats = 1
-
-   for _opt, _arg in optlist:
-      if _opt == '-i':
-         show_pbar = 0
-      if _opt == '-s':
-         report_stats = 0
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
+    show_pbar = True
+    report_stats = 1
+
+    for _opt, _arg in optlist:
+        if _opt == '-i':
+            show_pbar = 0
+        if _opt == '-s':
+            report_stats = 0
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
 
-   if args:
-      if len(args) > 1:
-         sys.stderr.write("bkmk2db: too many arguments\n")
-         sys.stderr.write("Usage: bkmk2db [-is] bookmarks.html\n")
-         sys.exit(1)
+    if args:
+        if len(args) > 1:
+            sys.stderr.write("bkmk2db: too many arguments\n")
+            sys.stderr.write("Usage: bkmk2db [-is] bookmarks.html\n")
+            sys.exit(1)
 
-      filename = args[0]
+        filename = args[0]
 
-   else:
-      filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX
+    else:
+        filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX
 
 
-   if report_stats:
-      from storage import storage_name
-      sys.stdout.write("Converting %s to %s: " % (filename, storage_name))
-      sys.stdout.flush()
+    if report_stats:
+        from storage import storage_name
+        sys.stdout.write("Converting %s to %s: " % (filename, storage_name))
+        sys.stdout.flush()
 
-   if show_pbar:
-      show_pbar = sys.stderr.isatty()
+    if show_pbar:
+        show_pbar = sys.stderr.isatty()
 
-   if show_pbar:
-      try:
-         from m_lib.pbar.tty_pbar import ttyProgressBar
-      except ImportError:
-         show_pbar = 0
+    if show_pbar:
+        try:
+            from m_lib.pbar.tty_pbar import ttyProgressBar
+        except ImportError:
+            show_pbar = 0
 
-   if show_pbar:
-      try:
-         size = os.path.getsize(filename)
-      except:
-         print(filename, ": no such file")
-         sys.exit(1)
+    if show_pbar:
+        try:
+            size = os.path.getsize(filename)
+        except:
+            print(filename, ": no such file")
+            sys.exit(1)
 
 
-   if show_pbar:
-      pbar = ttyProgressBar(0, size)
-      lng = 0
+    if show_pbar:
+        pbar = ttyProgressBar(0, size)
+        lng = 0
 
-      # This is for DOS - it counts CRLF, which len() counts as 1 char!
-      if os.name == 'dos' or os.name == 'nt' :
-         dos_add = 1
-      else:
-         dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct
+        # This is for DOS - it counts CRLF, which len() counts as 1 char!
+        if os.name == 'dos' or os.name == 'nt' :
+            dos_add = 1
+        else:
+            dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct
 
 
-   infile = open(filename, 'r')
-   parser = BkmkParser()
+    infile = open(filename, 'r')
+    parser = BkmkParser()
 
-   line_no = 0
-   lng = 0
-   ok = 1
+    line_no = 0
+    lng = 0
+    ok = 1
 
-   for line in infile:
-      if show_pbar:
-         lng = lng + len(line) + dos_add
-         pbar.display(lng)
+    for line in infile:
+        if show_pbar:
+            lng = lng + len(line) + dos_add
+            pbar.display(lng)
 
-      #line = line.strip()
-      line_no = line_no + 1
+        #line = line.strip()
+        line_no = line_no + 1
 
-      try:
-         parser.feed(line)
-      except:
-         ok = 0
-         break
+        try:
+            parser.feed(line)
+        except:
+            ok = 0
+            break
 
-   try:
-      parser.close()
-   except:
-      ok = 0
+    try:
+        parser.close()
+    except:
+        ok = 0
 
-   infile.close()
+    infile.close()
 
-   if show_pbar:
-      del pbar
+    if show_pbar:
+        del pbar
 
-   if report_stats:
-      print("Ok")
-      print(line_no, "lines proceed")
-      print(parser.urls, "urls found")
-      print(parser.objects, "objects created")
+    if report_stats:
+        print("Ok")
+        print(line_no, "lines proceed")
+        print(parser.urls, "urls found")
+        print(parser.objects, "objects created")
 
-   if ok:
-      from storage import storage
-      storage = storage()
-      storage.store(parser.root_folder)
+    if ok:
+        from storage import storage
+        storage = storage()
+        storage.store(parser.root_folder)
 
-   else:
-      import traceback
-      traceback.print_exc()
-      sys.exit(1)
+    else:
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
 
 
 if __name__ == '__main__':
-   run()
+    run()
index dce981190c217b7602bcd600b53055dd47c13488..6897721da802123064222a0ebac2c81b65081b14 100644 (file)
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['Folder', 'Bookmark', 'Ruler', 'Walker', 'Writer', 'Robot',
@@ -19,205 +19,205 @@ import os, urllib
 BKMK_FORMAT = os.environ.get("BKMK_FORMAT", "MOZILLA")
 
 class Folder(list):
-   isFolder = 1
-   isBookmark = 0
-
-   def __init__(self, add_date=None, comment='', last_modified=None):
-      super(Folder, self).__init__()
-      self.comment = comment
-      self.add_date = add_date
-      self.last_modified = last_modified
-
-   def walk_depth(self, walker, level=0):
-      if hasattr(self, "header"): # root folder
-         prune = 0
-         walker.root_folder(self)
-      else:
-         prune = walker.prune_folder(self)
-         if not prune:
-            walker.start_folder(self, level)
-
-      if not prune:
-         for object in self:
-            if object.isFolder:
-               object.walk_depth(walker, level+1)
-            elif object.isBookmark:
-               walker.bookmark(object, level)
-            else:
-               walker.ruler(object, level)
-
-         walker.end_folder(self, level)
+    isFolder = 1
+    isBookmark = 0
+
+    def __init__(self, add_date=None, comment='', last_modified=None):
+        super(Folder, self).__init__()
+        self.comment = comment
+        self.add_date = add_date
+        self.last_modified = last_modified
+
+    def walk_depth(self, walker, level=0):
+        if hasattr(self, "header"): # root folder
+            prune = 0
+            walker.root_folder(self)
+        else:
+            prune = walker.prune_folder(self)
+            if not prune:
+                walker.start_folder(self, level)
+
+        if not prune:
+            for object in self:
+                if object.isFolder:
+                    object.walk_depth(walker, level+1)
+                elif object.isBookmark:
+                    walker.bookmark(object, level)
+                else:
+                    walker.ruler(object, level)
+
+            walker.end_folder(self, level)
 
 
 class Bookmark(object):
-   isFolder = 0
-   isBookmark = 1
-
-   def __init__(self, href, add_date, last_visit=None, last_modified=None,
-         keyword=None, comment='', icon_href=None, icon=None,
-         charset=None, parser_charset=None):
-      protocol, request = urllib.splittype(href)
-      user, password, port = None, None, None
-      host, path = urllib.splithost(request)
-      if host:
-         user, host = urllib.splituser(host)
-         if user:
-            user, password = urllib.splitpasswd(user)
-         host, port = urllib.splitport(host)
-         if port: port = int(port)
-
-      if protocol == 'place':
-         href = protocol + ":"
-      else:
-         href = protocol + "://"
-      if user:
-         href += urllib.quote(user)
-         if password:
-            href += ':' + urllib.quote(password)
-         href += '@'
-      if host:
-         href += host.decode(parser_charset or 'utf-8').encode('idna')
-         if port:
-            href += ':%d' % port
-      if path:
-         href += path
-
-      self.href = href
-      self.add_date = add_date
-      self.last_visit = last_visit
-      self.last_modified = last_modified
-      self.keyword = keyword
-      self.comment = comment
-      self.icon_href = icon_href
-      self.icon = icon
-      self.charset = charset
+    isFolder = 0
+    isBookmark = 1
+
+    def __init__(self, href, add_date, last_visit=None, last_modified=None,
+          keyword=None, comment='', icon_href=None, icon=None,
+          charset=None, parser_charset=None):
+        protocol, request = urllib.splittype(href)
+        user, password, port = None, None, None
+        host, path = urllib.splithost(request)
+        if host:
+            user, host = urllib.splituser(host)
+            if user:
+                user, password = urllib.splitpasswd(user)
+            host, port = urllib.splitport(host)
+            if port: port = int(port)
+
+        if protocol == 'place':
+            href = protocol + ":"
+        else:
+            href = protocol + "://"
+        if user:
+            href += urllib.quote(user)
+            if password:
+                href += ':' + urllib.quote(password)
+            href += '@'
+        if host:
+            href += host.decode(parser_charset or 'utf-8').encode('idna')
+            if port:
+                href += ':%d' % port
+        if path:
+            href += path
+
+        self.href = href
+        self.add_date = add_date
+        self.last_visit = last_visit
+        self.last_modified = last_modified
+        self.keyword = keyword
+        self.comment = comment
+        self.icon_href = icon_href
+        self.icon = icon
+        self.charset = charset
 
 
 class Ruler(object):
-   isFolder = 0
-   isBookmark = 0
+    isFolder = 0
+    isBookmark = 0
 
 
 class Walker(object):
-   """
-      Interface class. Any instance that will be passed to Folder.walk_depth
-      may be derived from this class. It is not mandatory - unlike Java
-      Python does not require interface classes; but it is convenient to have
-      some methods predefined to no-op, in case you do not want to
-      provide end_folder etc.
-   """
+    """
+       Interface class. Any instance that will be passed to Folder.walk_depth
+       may be derived from this class. It is not mandatory - unlike Java
+       Python does not require interface classes; but it is convenient to have
+       some methods predefined to no-op, in case you do not want to
+       provide end_folder etc.
+    """
 
-   def root_folder(self, r):
-      pass
+    def root_folder(self, r):
+        pass
 
-   def start_folder(self, f, level):
-      pass
+    def start_folder(self, f, level):
+        pass
 
-   def end_folder(self, f, level):
-      pass
+    def end_folder(self, f, level):
+        pass
 
-   def bookmark(self, b, level):
-      pass
+    def bookmark(self, b, level):
+        pass
 
-   def ruler(self, r, level):
-      pass
+    def ruler(self, r, level):
+        pass
 
-   def prune_folder(self, folder):
-      return 0
+    def prune_folder(self, folder):
+        return 0
 
 
 class Writer(Walker):
-   def __init__(self, outfile, prune=None):
-      self.outfile = outfile
-      self.prune = prune
+    def __init__(self, outfile, prune=None):
+        self.outfile = outfile
+        self.prune = prune
 
-   def prune_folder(self, folder):
-      return self.prune == folder.name
+    def prune_folder(self, folder):
+        return self.prune == folder.name
 
 
 class Robot(object):
-   def __init__(self, log):
-      self.log = log
+    def __init__(self, log):
+        self.log = log
 
-   def stop(self):
-      pass # Nothing to do on cleanup
+    def stop(self):
+        pass # Nothing to do on cleanup
 
 
 # Helper class to make inverese links (nodes linked to their parent)
 class InverseLinker(Walker):
-   def root_folder(self, r):
-      self.parent_stack = [r]
+    def root_folder(self, r):
+        self.parent_stack = [r]
 
-   def start_folder(self, f, level):
-      f.parent = self.parent_stack[-1]
-      self.parent_stack.append(f) # Push the folder onto the stack of parents
+    def start_folder(self, f, level):
+        f.parent = self.parent_stack[-1]
+        self.parent_stack.append(f) # Push the folder onto the stack of parents
 
-   def end_folder(self, f, level):
-      del self.parent_stack[-1]   # Pop off the stack
+    def end_folder(self, f, level):
+        del self.parent_stack[-1]   # Pop off the stack
 
-   def bookmark(self, b, level):
-      b.parent = self.parent_stack[-1]
+    def bookmark(self, b, level):
+        b.parent = self.parent_stack[-1]
 
-   def ruler(self, r, level):
-      r.parent = self.parent_stack[-1]
+    def ruler(self, r, level):
+        r.parent = self.parent_stack[-1]
 
 
 # Helper class to make linear represenatation of the tree
 class Linear(Walker):
-   def root_folder(self, r):
-      r.linear = [r]
-      self.linear = r.linear
+    def root_folder(self, r):
+        r.linear = [r]
+        self.linear = r.linear
 
-   def add_object(self, object):
-      self.linear.append(object)
+    def add_object(self, object):
+        self.linear.append(object)
 
-   def start_folder(self, f, level):
-      self.add_object(f)
+    def start_folder(self, f, level):
+        self.add_object(f)
 
-   def bookmark(self, b, level):
-      self.add_object(b)
+    def bookmark(self, b, level):
+        self.add_object(b)
 
-   def ruler(self, r, level):
-      self.add_object(r)
+    def ruler(self, r, level):
+        self.add_object(r)
 
 
 # Helper - make linked linear represenatation of the tree, suitable to be stored in sequential storage
 def make_linear(root_folder):
-   linker = InverseLinker()
-   root_folder.walk_depth(linker)
+    linker = InverseLinker()
+    root_folder.walk_depth(linker)
 
-   linear = Linear()
-   root_folder.walk_depth(linear)
+    linear = Linear()
+    root_folder.walk_depth(linear)
 
 
 # Helper, opposite of make_linear - make a tree from the linked linear representation
 def make_tree(linear):
-   root_folder = linear[0]
-   del linear[0]
+    root_folder = linear[0]
+    del linear[0]
 
-   for object in linear:
-      object.parent.append(object)
+    for object in linear:
+        object.parent.append(object)
 
-   return root_folder
+    return root_folder
 
 def break_tree(linear):
-   del linear[0]
+    del linear[0]
 
-   for object in linear:
-      del object.parent
+    for object in linear:
+        del object.parent
 
 
 def quote_title(title):
-   if BKMK_FORMAT == "MOZILLA":
-      title = title.replace("'", "&#39;")
-   return title
+    if BKMK_FORMAT == "MOZILLA":
+        title = title.replace("'", "&#39;")
+    return title
 
 def unquote_title(title):
-   if BKMK_FORMAT == "MOZILLA":
-      from HTMLParser import HTMLParser
-      title = HTMLParser().unescape(title.replace("&amp;", '&').decode('utf-8'))
-      title = title.encode('utf-8').replace("&#39;", "'")
-   return title
+    if BKMK_FORMAT == "MOZILLA":
+        from HTMLParser import HTMLParser
+        title = HTMLParser().unescape(title.replace("&amp;", '&').decode('utf-8'))
+        title = title.encode('utf-8').replace("&#39;", "'")
+    return title
 
 
 def parse_params(param_str):
index 8f44f47f13bd1459a0f910dfbfd517e997ba6f46..6a69433ddf9ca3286cd3410284435b199c73252c 100644 (file)
@@ -19,208 +19,208 @@ from bkmk_objects import Folder, Bookmark, Ruler
 DEBUG = os.environ.has_key("BKMK_DEBUG")
 
 if DEBUG:
-   def debug(note):
-      print(note)
+    def debug(note):
+        print(note)
 
-   def dump_names(folder_stack):
-      l = []
-      for object in folder_stack:
-         if object.isFolder:
-            l.append(object.name)
-      return "'%s'" % "' '".join(l)
+    def dump_names(folder_stack):
+        l = []
+        for object in folder_stack:
+            if object.isFolder:
+                l.append(object.name)
+        return "'%s'" % "' '".join(l)
 
 else:
-   def debug(note):
-      pass
-   dump_names = debug
+    def debug(note):
+        pass
+    dump_names = debug
 
 
 class BkmkParser(HTMLParser):
-   def __init__(self):
-      HTMLParser.__init__(self)
-
-      self.urls = 0
-      self.objects = 0
-
-      self.charset = None
-      self.recode = None
-
-   def handle_data(self, data):
-      if data:
-         if self.charset and default_encoding:
-            data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace")
-         self.accumulator += data
-
-   # Mozilla - get charset
-   def do_meta(self, attrs):
-      http_equiv = ""
-      content = ""
-
-      for attrname, value in attrs:
-         value = value.strip()
-         if attrname == 'http-equiv':
-            http_equiv = value.lower()
-         elif attrname == 'content':
-            content = value
-
-      if http_equiv == "content-type":
-         try:
-            # extract charset from "text/html; charset=UTF-8"
-            self.charset = content.split('=')[1]
-         except IndexError:
-            pass
-
-   def start_title(self, attrs):
-      if default_encoding:
-         self.accumulator += '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%s">\n' % default_encoding
-      self.accumulator += "<TITLE>"
-
-   def end_title(self):
-      self.accumulator += "</TITLE>"
-
-   # Start root folder
-   def start_h1(self, attrs):
-      root_folder = Folder()
-      self.current_object = root_folder
-      self.root_folder = root_folder
-      self.current_folder = root_folder
-      self.folder_stack = [root_folder]
-
-      self.root_folder.header = self.accumulator.strip()
-      self.accumulator = ''
-
-   def end_h1(self):
-      accumulator = self.accumulator
-      self.accumulator = ''
-
-      debug("Root folder name: `%s'" % accumulator)
-      self.root_folder.name = accumulator
-
-   # Start a folder
-   def start_h3(self, attrs):
-      last_modified = None
-      for attrname, value in attrs:
-         value = value.strip()
-         if attrname == 'add_date':
-            add_date = value
-         elif attrname == 'last_modified':
-            last_modified = value
-
-      debug("New folder...")
-      folder = Folder(add_date, last_modified=last_modified)
-      self.current_object = folder
-      self.current_folder.append(folder)
-      self.folder_stack.append(folder) # push new folder
-      self.current_folder = folder
-      self.objects += 1
-
-   def end_h3(self):
-      accumulator = self.accumulator
-      self.accumulator = ''
-
-      debug("Folder name: `%s'" % accumulator)
-      self.current_folder.name = accumulator
-
-   # Start a bookmark
-   def start_a(self, attrs):
-      add_date = None
-      last_visit = None
-      last_modified = None
-      keyword = ''
-      icon = None
-      charset = None
-
-      for attrname, value in attrs:
-         value = value.strip()
-         if attrname == "href":
-            href = value
-         elif attrname == "add_date":
-            add_date = value
-         elif attrname == "last_visit":
-            last_visit = value
-         elif attrname == "last_modified":
-            last_modified = value
-         elif attrname == "shortcuturl":
-            keyword = value
-         elif attrname == "icon":
-            icon = value
-         elif attrname == "last_charset":
-            charset = value
-
-      debug("Bookmark points to: `%s'" % href)
-      bookmark = Bookmark(href, add_date, last_visit, last_modified,
-         keyword=keyword, icon=icon,
-         charset=charset, parser_charset=self.charset or default_encoding)
-      self.current_object = bookmark
-      self.current_folder.append(bookmark)
-      self.urls += 1
-      self.objects += 1
-
-   def end_a(self):
-      accumulator = self.accumulator
-      self.accumulator = ''
-
-      debug("Bookmark name: `%s'" % accumulator)
-      bookmark = self.current_folder[-1]
-      bookmark.name = accumulator
-
-   def flush(self):
-      accumulator = self.accumulator
-
-      if accumulator:
-         self.accumulator = ''
-
-         current_object = self.current_object
-         if current_object:
-            current_object.comment += accumulator.strip()
-            debug("Comment: `%s'" % current_object.comment)
-
-   def start_dl(self, attrs):
-      self.flush()
-
-   do_dt = start_dl
-
-   # End of folder
-   def end_dl(self):
-      self.flush()
-      debug("End folder")
-      debug("Folder stack: %s" % dump_names(self.folder_stack))
-      if self.folder_stack:
-         del self.folder_stack[-1] # pop last folder
-         if self.folder_stack:
-            self.current_folder = self.folder_stack[-1]
-         else:
-            debug("FOLDER STACK is EMPTY!!! (1)")
-      else:
-         debug("FOLDER STACK is EMPTY!!! (2)")
-      self.current_object = None
-
-   def close(self):
-      HTMLParser.close(self)
-      if self.folder_stack:
-         raise ValueError("wrong folder stack: %s" % self.folder_stack)
-
-   def do_dd(self, attrs):
-      pass
-
-   do_p = do_dd
-
-   # Start ruler
-   def do_hr(self, attrs):
-      self.flush()
-      debug("Ruler")
-      self.current_folder.append(Ruler())
-      self.current_object = None
-      self.objects += 1
-
-   # BR in comment
-   def do_br(self, attrs):
-      self.accumulator += "<BR>"
-
-   # Allow < in the text
-   def unknown_starttag(self, tag, attrs):
-      self.accumulator += "<%s>" % tag
-
-   # Do not allow unknow end tags
-   def unknown_endtag(self, tag):
-      raise NotImplementedError("Unknow end tag `%s'" % tag)
+    def __init__(self):
+        HTMLParser.__init__(self)
+
+        self.urls = 0
+        self.objects = 0
+
+        self.charset = None
+        self.recode = None
+
+    def handle_data(self, data):
+        if data:
+            if self.charset and default_encoding:
+                data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace")
+            self.accumulator += data
+
+    # Mozilla - get charset
+    def do_meta(self, attrs):
+        http_equiv = ""
+        content = ""
+
+        for attrname, value in attrs:
+            value = value.strip()
+            if attrname == 'http-equiv':
+                http_equiv = value.lower()
+            elif attrname == 'content':
+                content = value
+
+        if http_equiv == "content-type":
+            try:
+                # extract charset from "text/html; charset=UTF-8"
+                self.charset = content.split('=')[1]
+            except IndexError:
+                pass
+
+    def start_title(self, attrs):
+        if default_encoding:
+            self.accumulator += '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%s">\n' % default_encoding
+        self.accumulator += "<TITLE>"
+
+    def end_title(self):
+        self.accumulator += "</TITLE>"
+
+    # Start root folder
+    def start_h1(self, attrs):
+        root_folder = Folder()
+        self.current_object = root_folder
+        self.root_folder = root_folder
+        self.current_folder = root_folder
+        self.folder_stack = [root_folder]
+
+        self.root_folder.header = self.accumulator.strip()
+        self.accumulator = ''
+
+    def end_h1(self):
+        accumulator = self.accumulator
+        self.accumulator = ''
+
+        debug("Root folder name: `%s'" % accumulator)
+        self.root_folder.name = accumulator
+
+    # Start a folder
+    def start_h3(self, attrs):
+        last_modified = None
+        for attrname, value in attrs:
+            value = value.strip()
+            if attrname == 'add_date':
+                add_date = value
+            elif attrname == 'last_modified':
+                last_modified = value
+
+        debug("New folder...")
+        folder = Folder(add_date, last_modified=last_modified)
+        self.current_object = folder
+        self.current_folder.append(folder)
+        self.folder_stack.append(folder) # push new folder
+        self.current_folder = folder
+        self.objects += 1
+
+    def end_h3(self):
+        accumulator = self.accumulator
+        self.accumulator = ''
+
+        debug("Folder name: `%s'" % accumulator)
+        self.current_folder.name = accumulator
+
+    # Start a bookmark
+    def start_a(self, attrs):
+        add_date = None
+        last_visit = None
+        last_modified = None
+        keyword = ''
+        icon = None
+        charset = None
+
+        for attrname, value in attrs:
+            value = value.strip()
+            if attrname == "href":
+                href = value
+            elif attrname == "add_date":
+                add_date = value
+            elif attrname == "last_visit":
+                last_visit = value
+            elif attrname == "last_modified":
+                last_modified = value
+            elif attrname == "shortcuturl":
+                keyword = value
+            elif attrname == "icon":
+                icon = value
+            elif attrname == "last_charset":
+                charset = value
+
+        debug("Bookmark points to: `%s'" % href)
+        bookmark = Bookmark(href, add_date, last_visit, last_modified,
+           keyword=keyword, icon=icon,
+           charset=charset, parser_charset=self.charset or default_encoding)
+        self.current_object = bookmark
+        self.current_folder.append(bookmark)
+        self.urls += 1
+        self.objects += 1
+
+    def end_a(self):
+        accumulator = self.accumulator
+        self.accumulator = ''
+
+        debug("Bookmark name: `%s'" % accumulator)
+        bookmark = self.current_folder[-1]
+        bookmark.name = accumulator
+
+    def flush(self):
+        accumulator = self.accumulator
+
+        if accumulator:
+            self.accumulator = ''
+
+            current_object = self.current_object
+            if current_object:
+                current_object.comment += accumulator.strip()
+                debug("Comment: `%s'" % current_object.comment)
+
+    def start_dl(self, attrs):
+        self.flush()
+
+    do_dt = start_dl
+
+    # End of folder
+    def end_dl(self):
+        self.flush()
+        debug("End folder")
+        debug("Folder stack: %s" % dump_names(self.folder_stack))
+        if self.folder_stack:
+            del self.folder_stack[-1] # pop last folder
+            if self.folder_stack:
+                self.current_folder = self.folder_stack[-1]
+            else:
+                debug("FOLDER STACK is EMPTY!!! (1)")
+        else:
+            debug("FOLDER STACK is EMPTY!!! (2)")
+        self.current_object = None
+
+    def close(self):
+        HTMLParser.close(self)
+        if self.folder_stack:
+            raise ValueError("wrong folder stack: %s" % self.folder_stack)
+
+    def do_dd(self, attrs):
+        pass
+
+    do_p = do_dd
+
+    # Start ruler
+    def do_hr(self, attrs):
+        self.flush()
+        debug("Ruler")
+        self.current_folder.append(Ruler())
+        self.current_object = None
+        self.objects += 1
+
+    # BR in comment
+    def do_br(self, attrs):
+        self.accumulator += "<BR>"
+
+    # Allow < in the text
+    def unknown_starttag(self, tag, attrs):
+        self.accumulator += "<%s>" % tag
+
+    # Do not allow unknow end tags
+    def unknown_endtag(self, tag):
+        raise NotImplementedError("Unknow end tag `%s'" % tag)
index f635c23ad9c48011c619a5ec0fcfaa4dcef3c818..0a267ac60544a28322657df7ba7227aa26e7deaa 100755 (executable)
@@ -16,79 +16,79 @@ __license__ = "GNU GPL"
 log_file = None
 
 def report_dup(href, object_no):
-   s = "Duplicate URL: %s (first at rec. %d)" % (href, object_no)
+    s = "Duplicate URL: %s (first at rec. %d)" % (href, object_no)
 
-   if log_file:
-      log_file.write("%s\n" % s)
-   else:
-      print(s)
+    if log_file:
+        log_file.write("%s\n" % s)
+    else:
+        print(s)
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "sl:")
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "sl:")
 
-   report_stats = 1
-   global log_file
-   log_filename = None
+    report_stats = 1
+    global log_file
+    log_filename = None
 
-   for _opt, _arg in optlist:
-      if _opt == '-s':
-         report_stats = 0
-      if _opt == '-l':
-         log_filename = _arg
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+        if _opt == '-l':
+            log_filename = _arg
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
 
-   if report_stats:
-      print("Broytman check_dups, Copyright (C) 2000-2017 PhiloSoft Design")
+    if report_stats:
+        print("Broytman check_dups, Copyright (C) 2000-2017 PhiloSoft Design")
 
-   if args:
-      sys.stderr.write("check_urls: too many arguments\n")
-      sys.stderr.write("Usage: check_urls [-s] [-l logfile]\n")
-      sys.exit(1)
+    if args:
+        sys.stderr.write("check_urls: too many arguments\n")
+        sys.stderr.write("Usage: check_urls [-s] [-l logfile]\n")
+        sys.exit(1)
 
-   if log_filename:
-      log_file = open(log_filename, 'w')
+    if log_filename:
+        log_file = open(log_filename, 'w')
 
-   from storage import storage
-   storage = storage()
+    from storage import storage
+    storage = storage()
 
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
 
-   root_folder = storage.load()
-   from bkmk_objects import make_linear
-   make_linear(root_folder)
-   objects = len(root_folder.linear)
+    root_folder = storage.load()
+    from bkmk_objects import make_linear
+    make_linear(root_folder)
+    objects = len(root_folder.linear)
 
-   if report_stats:
-      print("Ok")
+    if report_stats:
+        print("Ok")
 
 
-   dup_dict = {}
+    dup_dict = {}
 
-   for object_no in range(objects):
-      object = root_folder.linear[object_no]
+    for object_no in range(objects):
+        object = root_folder.linear[object_no]
 
-      if object.isBookmark:
-         href = object.href
-         if dup_dict.has_key(href):
-            report_dup(href, dup_dict[href])
-         else:
-            dup_dict[href] = object_no
+        if object.isBookmark:
+            href = object.href
+            if dup_dict.has_key(href):
+                report_dup(href, dup_dict[href])
+            else:
+                dup_dict[href] = object_no
 
 
-   if log_filename:
-      log_file.close()
+    if log_filename:
+        log_file.close()
 
-   if report_stats:
-      print("Ok")
-      print(objects, "objects passed")
+    if report_stats:
+        print("Ok")
+        print(objects, "objects passed")
 
 
 if __name__ == '__main__':
-   run()
+    run()
index e8215e8e7815c9a74111d3bbc459d1440b7b72ca..ffab6fa7d325b69cd787b15e5f28fde7681bcaeb 100755 (executable)
@@ -15,68 +15,68 @@ __license__ = "GNU GPL"
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "s")
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "s")
 
-   report_stats = 1
+    report_stats = 1
 
-   for _opt, _arg in optlist:
-      if _opt == '-s':
-         report_stats = 0
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
 
-   if report_stats:
-      print("Broytman check_title, Copyright (C) 2002-2017 PhiloSoft Design")
+    if report_stats:
+        print("Broytman check_title, Copyright (C) 2002-2017 PhiloSoft Design")
 
-   if args:
-      sys.stderr.write("check_title: too many arguments\n")
-      sys.stderr.write("Usage: check_title [-s]\n")
-      sys.exit(1)
+    if args:
+        sys.stderr.write("check_title: too many arguments\n")
+        sys.stderr.write("Usage: check_title [-s]\n")
+        sys.exit(1)
 
-   from storage import storage
-   storage = storage()
+    from storage import storage
+    storage = storage()
 
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
 
-   root_folder = storage.load()
-   make_linear(root_folder)
-   objects = len(root_folder.linear)
+    root_folder = storage.load()
+    make_linear(root_folder)
+    objects = len(root_folder.linear)
 
-   if report_stats:
-      print("Ok")
+    if report_stats:
+        print("Ok")
 
 
-   for object_no in range(objects):
-      object = root_folder.linear[object_no]
+    for object_no in range(objects):
+        object = root_folder.linear[object_no]
 
-      if object.isBookmark:
-         if hasattr(object, "moved") or hasattr(object, "error") \
-               or object.href.startswith('place:'): # Firefox SmartBookmarks
-            continue
+        if object.isBookmark:
+            if hasattr(object, "moved") or hasattr(object, "error") \
+                  or object.href.startswith('place:'): # Firefox SmartBookmarks
+                continue
 
-         if hasattr(object, "real_title") and (object.real_title is not None):
-            unquoted_title = unquote_title(quote_title(object.real_title))
-            unquoted_name = unquote_title(object.name)
-            if unquoted_name != unquoted_title:
-               print(object.href)
-               print(unquoted_name)
-               print(unquoted_title)
-               print()
-         else:
-            print(object.href)
-            print(object.name)
-            print("NO REAL TITLE!!!")
-            print()
+            if hasattr(object, "real_title") and (object.real_title is not None):
+                unquoted_title = unquote_title(quote_title(object.real_title))
+                unquoted_name = unquote_title(object.name)
+                if unquoted_name != unquoted_title:
+                    print(object.href)
+                    print(unquoted_name)
+                    print(unquoted_title)
+                    print()
+            else:
+                print(object.href)
+                print(object.name)
+                print("NO REAL TITLE!!!")
+                print()
 
 
-   if report_stats:
-      print(objects, "objects passed")
+    if report_stats:
+        print(objects, "objects passed")
 
 
 if __name__ == '__main__':
-   run()
+    run()
index 8a6ec0aa5d2b11d6177d659422b0f4568856126c..46135c53e6877a3e186c8a48f7ab4a6f764ed963 100755 (executable)
@@ -25,40 +25,40 @@ from Writers.bkmk_wflad import strftime
 
 
 def run():
-   print("Broytman check_url, Copyright (C) 2010-2017 PhiloSoft Design")
+    print("Broytman check_url, Copyright (C) 2010-2017 PhiloSoft Design")
 
-   if len(sys.argv) < 2:
-      sys.stderr.write("Usage: check_url.py url1 [url2...]\n")
-      sys.exit(1)
+    if len(sys.argv) < 2:
+        sys.stderr.write("Usage: check_url.py url1 [url2...]\n")
+        sys.exit(1)
 
-   from m_lib.flog import makelog
-   log = makelog("check.log")
+    from m_lib.flog import makelog
+    log = makelog("check.log")
 
-   from robots import robot
-   robot = robot(log)
+    from robots import robot
+    robot = robot(log)
 
-   for url in sys.argv[1:]:
-       bookmark = Bookmark(href=url, add_date=None)
-       bookmark.parent = None
+    for url in sys.argv[1:]:
+        bookmark = Bookmark(href=url, add_date=None)
+        bookmark.parent = None
 
-       rcode = robot.check_url(bookmark)
-       print("check_url: %s" % rcode)
+        rcode = robot.check_url(bookmark)
+        print("check_url: %s" % rcode)
 
-       if hasattr(bookmark, 'error'):
-          print(bookmark.error)
+        if hasattr(bookmark, 'error'):
+            print(bookmark.error)
 
-       else:
-          print("""\
-URL: %s
-Title: %s
-LastModified: %s
-IconURI: %s
-Icon: %s
-""" % (bookmark.href, getattr(bookmark, 'real_title', ''), strftime(bookmark.last_modified), bookmark.icon_href, bookmark.icon))
+        else:
+            print("""\
+  URL: %s
+  Title: %s
+  LastModified: %s
+  IconURI: %s
+  Icon: %s
+  """ % (bookmark.href, getattr(bookmark, 'real_title', ''), strftime(bookmark.last_modified), bookmark.icon_href, bookmark.icon))
 
-   robot.stop()
-   log.close()
+    robot.stop()
+    log.close()
 
 
 if __name__ == '__main__':
-   run()
+    run()
index d6d038d2c624a116c22b60f772260095661909a1..a7314f3b789ee120cfb9d239fd05d1e14d3268a5 100755 (executable)
@@ -14,150 +14,150 @@ __license__ = "GNU GPL"
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "ise")
-
-   show_pbar = 1
-   report_stats = 1
-   only_errors = 0
-
-   for _opt, _arg in optlist:
-      if _opt == '-i':
-         show_pbar = 0
-      if _opt == '-s':
-         report_stats = 0
-      if _opt == '-e':
-         only_errors = 1
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
-
-   if report_stats:
-      print("Broytman check_urls, Copyright (C) 1997-2017 PhiloSoft Design")
-
-   if args:
-      sys.stderr.write("check_urls: too many arguments\n")
-      sys.stderr.write("Usage: check_urls [-ise]\n")
-      sys.exit(1)
-
-   if show_pbar:
-      show_pbar = sys.stderr.isatty()
-
-   if show_pbar:
-      try:
-         from m_lib.pbar.tty_pbar import ttyProgressBar
-      except ImportError:
-         show_pbar = 0
-
-   from m_lib.flog import makelog, openlog
-   if only_errors:
-      log = openlog("check.log")
-      log("chk_urls restarted for errors")
-      if report_stats:
-         print("chk_urls restarted for errors")
-   else:
-      log = makelog("check.log")
-      log("check_urls started")
-      if report_stats:
-         print("   check_urls: normal start")
-
-   from storage import storage
-   storage = storage()
-
-   from robots import robot
-   robot = robot(log)
-
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
-
-   root_folder = storage.load()
-   from bkmk_objects import make_linear, break_tree
-   make_linear(root_folder)
-   objects = len(root_folder.linear)
-
-   if report_stats:
-      print("Ok")
-
-   if report_stats:
-      if only_errors:
-         s = "Rechecking errors: "
-      else:
-         s = "Checking: "
-      sys.stdout.write(s)
-      sys.stdout.flush()
-
-   if show_pbar:
-      pbar = ttyProgressBar(0, objects)
-
-   urls_no = 0
-   object_count = 0
-   size = 0
-
-   checked = {}
-   rcode = 1
-
-   for object_no in range(objects):
-      if show_pbar:
-         pbar.display(object_no+1)
-
-      object = root_folder.linear[object_no]
-      object_count = object_count + 1
-
-      if object.isBookmark:
-         href = object.href
-         if (href.startswith('place:') # Firefox SmartBookmarks
-               or '%s' in href): # Bookmark with keyword
-            log("Skipped %s" % href)
-            continue
-
-         if only_errors:
-            if hasattr(object, "error"):
-               delattr(object, "error")
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "ise")
+
+    show_pbar = 1
+    report_stats = 1
+    only_errors = 0
+
+    for _opt, _arg in optlist:
+        if _opt == '-i':
+            show_pbar = 0
+        if _opt == '-s':
+            report_stats = 0
+        if _opt == '-e':
+            only_errors = 1
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
+
+    if report_stats:
+        print("Broytman check_urls, Copyright (C) 1997-2017 PhiloSoft Design")
+
+    if args:
+        sys.stderr.write("check_urls: too many arguments\n")
+        sys.stderr.write("Usage: check_urls [-ise]\n")
+        sys.exit(1)
+
+    if show_pbar:
+        show_pbar = sys.stderr.isatty()
+
+    if show_pbar:
+        try:
+            from m_lib.pbar.tty_pbar import ttyProgressBar
+        except ImportError:
+            show_pbar = 0
+
+    from m_lib.flog import makelog, openlog
+    if only_errors:
+        log = openlog("check.log")
+        log("chk_urls restarted for errors")
+        if report_stats:
+            print("chk_urls restarted for errors")
+    else:
+        log = makelog("check.log")
+        log("check_urls started")
+        if report_stats:
+            print("   check_urls: normal start")
+
+    from storage import storage
+    storage = storage()
+
+    from robots import robot
+    robot = robot(log)
+
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
+
+    root_folder = storage.load()
+    from bkmk_objects import make_linear, break_tree
+    make_linear(root_folder)
+    objects = len(root_folder.linear)
+
+    if report_stats:
+        print("Ok")
+
+    if report_stats:
+        if only_errors:
+            s = "Rechecking errors: "
+        else:
+            s = "Checking: "
+        sys.stdout.write(s)
+        sys.stdout.flush()
+
+    if show_pbar:
+        pbar = ttyProgressBar(0, objects)
+
+    urls_no = 0
+    object_count = 0
+    size = 0
+
+    checked = {}
+    rcode = 1
+
+    for object_no in range(objects):
+        if show_pbar:
+            pbar.display(object_no+1)
+
+        object = root_folder.linear[object_no]
+        object_count = object_count + 1
+
+        if object.isBookmark:
+            href = object.href
+            if (href.startswith('place:') # Firefox SmartBookmarks
+                  or '%s' in href): # Bookmark with keyword
+                log("Skipped %s" % href)
+                continue
+
+            if only_errors:
+                if hasattr(object, "error"):
+                    delattr(object, "error")
+                else:
+                    continue
+
+            if checked.has_key(href):
+                log("Already checked %s" % href)
+                old_object = root_folder.linear[checked[href]]
+                for attr_name in ("last_visit", "last_modified",
+                      "error", "no_error", "moved", "size", "md5", "real_title",
+                      "last_tested", "test_time", "icon", "charset"):
+                    if hasattr(old_object, attr_name):
+                        setattr(object, attr_name, getattr(old_object, attr_name))
             else:
-               continue
-
-         if checked.has_key(href):
-            log("Already checked %s" % href)
-            old_object = root_folder.linear[checked[href]]
-            for attr_name in ("last_visit", "last_modified",
-                  "error", "no_error", "moved", "size", "md5", "real_title",
-                  "last_tested", "test_time", "icon", "charset"):
-               if hasattr(old_object, attr_name):
-                  setattr(object, attr_name, getattr(old_object, attr_name))
-         else:
-            log("Checking %s" % href)
-            rcode = robot.check_url(object)
-
-            if rcode:
-               checked[href] = object_no
-               urls_no = urls_no + 1
-               try:
-                  size = size + int(object.size)
-               except (AttributeError, TypeError, ValueError):
-                  pass # Some object does not have a size :(
-            else:
-               log("Interrupted by user (^C)")
-               break
-   robot.stop()
-
-   if show_pbar:
-      del pbar
-
-   if report_stats:
-      print("Ok")
-      print(object_count, "objects passed")
-      print(urls_no, "URLs checked")
-      print(size, "bytes eaten")
-
-   break_tree(root_folder.linear)
-   storage.store(root_folder)
-
-   if rcode:
-      log("check_urls finished ok")
-   log.close()
+                log("Checking %s" % href)
+                rcode = robot.check_url(object)
+
+                if rcode:
+                    checked[href] = object_no
+                    urls_no = urls_no + 1
+                    try:
+                        size = size + int(object.size)
+                    except (AttributeError, TypeError, ValueError):
+                        pass # Some object does not have a size :(
+                else:
+                    log("Interrupted by user (^C)")
+                    break
+    robot.stop()
+
+    if show_pbar:
+        del pbar
+
+    if report_stats:
+        print("Ok")
+        print(object_count, "objects passed")
+        print(urls_no, "URLs checked")
+        print(size, "bytes eaten")
+
+    break_tree(root_folder.linear)
+    storage.store(root_folder)
+
+    if rcode:
+        log("check_urls finished ok")
+    log.close()
 
 
 if __name__ == '__main__':
-   run()
+    run()
index 4c0cb15d206968c4045d78cd930ad9fc4a03c437..381239fc3ab45ab38913d3fb46233764a6a87ea2 100755 (executable)
@@ -12,50 +12,50 @@ import sys
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "s")
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "s")
 
-   report_stats = 1
+    report_stats = 1
 
-   for _opt, _arg in optlist:
-      if _opt == '-s':
-         report_stats = 0
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
 
-   if len(args) != 1:
-      sys.stderr.write("convert_st: too many or too few arguments\n")
-      sys.stderr.write("Usage: convert_st [-s] new_storage\n")
-      sys.exit(1)
+    if len(args) != 1:
+        sys.stderr.write("convert_st: too many or too few arguments\n")
+        sys.stderr.write("Usage: convert_st [-s] new_storage\n")
+        sys.exit(1)
 
-   from bkmk_objects import parse_params, set_params
-   from storage import storage, import_storage
+    from bkmk_objects import parse_params, set_params
+    from storage import storage, import_storage
 
-   storage = storage()
+    storage = storage()
 
-   storage_name, storage_params = parse_params(args[0])
-   new_storage = import_storage(storage_name)
-   set_params(new_storage, storage_params)
-   new_storage = new_storage()
+    storage_name, storage_params = parse_params(args[0])
+    new_storage = import_storage(storage_name)
+    set_params(new_storage, storage_params)
+    new_storage = new_storage()
 
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
 
-   root_folder = storage.load()
+    root_folder = storage.load()
 
-   if report_stats:
-      print("Ok")
-      sys.stdout.write("Converting to %s: " % new_storage.filename)
-      sys.stdout.flush()
+    if report_stats:
+        print("Ok")
+        sys.stdout.write("Converting to %s: " % new_storage.filename)
+        sys.stdout.flush()
 
-   new_storage.store(root_folder)
+    new_storage.store(root_folder)
 
-   if report_stats:
-      print("Ok")
+    if report_stats:
+        print("Ok")
 
 
 if __name__ == '__main__':
-   run()
+    run()
index 28ebc76959e5fb6d470e4a1d3fb92be1fee4a7bf..e0ac1c1bcb6b76c6247f7680421a2e66765b34a4 100755 (executable)
@@ -12,97 +12,97 @@ import sys
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "sp:o:t:r")
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "sp:o:t:r")
 
-   report_stats = 1
-   prune = None
+    report_stats = 1
+    prune = None
 
-   from writers import writer
-   output_filename = writer.filename
+    from writers import writer
+    output_filename = writer.filename
 
-   transl = 0
-   transl_name = "" # dictionary translation; default is no translation
+    transl = 0
+    transl_name = "" # dictionary translation; default is no translation
 
-   for _opt, _arg in optlist:
-      if _opt == '-s':
-         report_stats = 0
-      if _opt == '-p':
-         prune = _arg
-      if _opt == '-o':
-         output_filename = _arg
-      if _opt == '-t':
-         transl = 1
-         transl_name = _arg
-      if _opt == '-r':
-         transl = 2
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+        if _opt == '-p':
+            prune = _arg
+        if _opt == '-o':
+            output_filename = _arg
+        if _opt == '-t':
+            transl = 1
+            transl_name = _arg
+        if _opt == '-r':
+            transl = 2
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
 
-   if args:
-      sys.stderr.write("db2bkmk: too many arguments\n")
-      sys.stderr.write("Usage: db2bkmk [-s] [-p prune_folder] [-o filename] [-t trans] [-r]\n")
-      sys.exit(1)
+    if args:
+        sys.stderr.write("db2bkmk: too many arguments\n")
+        sys.stderr.write("Usage: db2bkmk [-s] [-p prune_folder] [-o filename] [-t trans] [-r]\n")
+        sys.exit(1)
 
-   from storage import storage
-   storage = storage()
+    from storage import storage
+    storage = storage()
 
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
 
-   root_folder = storage.load()
+    root_folder = storage.load()
 
-   if report_stats:
-      print("Ok")
-      sys.stdout.write("Writing %s: " % output_filename)
-      sys.stdout.flush()
+    if report_stats:
+        print("Ok")
+        sys.stdout.write("Writing %s: " % output_filename)
+        sys.stdout.flush()
 
 
-   if transl:
-      new_ext = str(transl)
-      transl_d = {}
+    if transl:
+        new_ext = str(transl)
+        transl_d = {}
 
-      from m_lib.flad import fladm
-      transl_db = fladm.load_from_file(transl_name, fladm.check_record, ["URL1", "URL2"], [""])
-                                      # This prevents any other key to appear in transl_db ^
+        from m_lib.flad import fladm
+        transl_db = fladm.load_from_file(transl_name, fladm.check_record, ["URL1", "URL2"], [""])
+                                        # This prevents any other key to appear in transl_db ^
 
-      # Generate translation dictionary (hash table)
-      if transl == 1:
-         for record in transl_db:
-            transl_d[record["URL1"]] = record["URL2"]
-      elif transl == 2:
-         for record in transl_db:
-            transl_d[record["URL2"]] = record["URL1"]
-      else:
-         raise ValueError("transl (%d) must be 1 or 2" % transl)
+        # Generate translation dictionary (hash table)
+        if transl == 1:
+            for record in transl_db:
+                transl_d[record["URL1"]] = record["URL2"]
+        elif transl == 2:
+            for record in transl_db:
+                transl_d[record["URL2"]] = record["URL1"]
+        else:
+            raise ValueError("transl (%d) must be 1 or 2" % transl)
 
-      del transl_db # Save few bytes of memory
+        del transl_db # Save few bytes of memory
 
-      from bkmk_objects import Walker
-      class Transl(Walker):
-         def __init__(self, transl_d):
-            self.transl_d = transl_d
+        from bkmk_objects import Walker
+        class Transl(Walker):
+            def __init__(self, transl_d):
+                self.transl_d = transl_d
 
-         def bookmark(self, b, level):
-            href = b.href
-            transl_d = self.transl_d
+            def bookmark(self, b, level):
+                href = b.href
+                transl_d = self.transl_d
 
-            if transl_d.has_key(href):
-               b.href = transl_d[href]
+                if transl_d.has_key(href):
+                    b.href = transl_d[href]
 
-      root_folder.walk_depth(Transl(transl_d))
+        root_folder.walk_depth(Transl(transl_d))
 
 
-   outfile = open(output_filename, 'w')
-   root_folder.walk_depth(writer(outfile, prune))
-   outfile.close()
+    outfile = open(output_filename, 'w')
+    root_folder.walk_depth(writer(outfile, prune))
+    outfile.close()
 
-   if report_stats:
-      print("Ok")
+    if report_stats:
+        print("Ok")
 
 
 if __name__ == '__main__':
-   run()
+    run()
index 34b8ed44cf90838d17f3458f460129ea80101f1f..61e04e06c527196a13c139e3f8637e813ab13af8 100644 (file)
@@ -14,22 +14,22 @@ __license__ = "GNU GPL"
 
 
 def main():
-   import sys
-   from .bkmk_parse_html import universal_charset
-
-   l = len(sys.argv)
-   if l == 3:
-      filename = sys.argv[1]
-      charset = sys.argv[2]
-   elif l == 2:
-      filename = sys.argv[1]
-      charset = universal_charset
-   else:
-      sys.exit("Usage: main filename [charset]")
-
-   parser = parse_filename(filename, charset, log=lambda s: sys.stdout.write(s + '\n'))
-   print("   refresh:", parser.refresh)
-   print("   icon   :", parser.icon)
+    import sys
+    from .bkmk_parse_html import universal_charset
+
+    l = len(sys.argv)
+    if l == 3:
+        filename = sys.argv[1]
+        charset = sys.argv[2]
+    elif l == 2:
+        filename = sys.argv[1]
+        charset = universal_charset
+    else:
+        sys.exit("Usage: main filename [charset]")
+
+    parser = parse_filename(filename, charset, log=lambda s: sys.stdout.write(s + '\n'))
+    print("   refresh:", parser.refresh)
+    print("   icon   :", parser.icon)
 
 if __name__ == '__main__':
     main()
index 2e412ad84dc69756662c215e287e120dad66f72a..7bc4640d31ccad2a902335c8a2aad978b02bb09c 100644 (file)
@@ -19,31 +19,31 @@ DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
 parsers = []
 
 try:
-   from . import bkmk_ph_beautifulsoup
+    from . import bkmk_ph_beautifulsoup
 except ImportError:
-   pass
+    pass
 else:
-   bkmk_ph_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
-   parsers.append(bkmk_ph_beautifulsoup.parse_html)
+    bkmk_ph_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
+    parsers.append(bkmk_ph_beautifulsoup.parse_html)
 
 try:
-   from . import bkmk_ph_html5
+    from . import bkmk_ph_html5
 except ImportError:
-   pass
+    pass
 else:
-   parsers.append(bkmk_ph_html5.parse_html)
+    parsers.append(bkmk_ph_html5.parse_html)
 
 try:
-   from . import bkmk_ph_lxml
+    from . import bkmk_ph_lxml
 except ImportError:
-   pass
+    pass
 else:
     parsers.append(bkmk_ph_lxml.parse_html)
 
 try:
-   from . import bkmk_ph_htmlparser
+    from . import bkmk_ph_htmlparser
 except ImportError:
-   pass
+    pass
 else:
     parsers.append(bkmk_ph_htmlparser.parse_html)
 
@@ -62,128 +62,128 @@ entity_re = re.compile("(&\w+;)")
 num_entity_re = re.compile("(&#[0-9]+;)")
 
 def recode_entities(title, charset):
-   output = []
-   for part in entity_re.split(title):
-      if part not in ("&amp;", "&lt;", "&gt;", "&quot;") and \
-            entity_re.match(part):
-         _part = name2codepoint.get(part[1:-1], None)
-         if _part is not None:
-             part = unichr(_part).encode(charset)
-      output.append(part)
-   title = ''.join(output)
-
-   output = []
-   for part in num_entity_re.split(title):
-      if num_entity_re.match(part):
-         try:
-            part = unichr(int(part[2:-1])).encode(charset)
-         except UnicodeEncodeError:
-            pass # Leave the entity as is
-      output.append(part)
-
-   return ''.join(output)
+    output = []
+    for part in entity_re.split(title):
+        if part not in ("&amp;", "&lt;", "&gt;", "&quot;") and \
+              entity_re.match(part):
+            _part = name2codepoint.get(part[1:-1], None)
+            if _part is not None:
+                part = unichr(_part).encode(charset)
+        output.append(part)
+    title = ''.join(output)
+
+    output = []
+    for part in num_entity_re.split(title):
+        if num_entity_re.match(part):
+            try:
+                part = unichr(int(part[2:-1])).encode(charset)
+            except UnicodeEncodeError:
+                pass # Leave the entity as is
+        output.append(part)
+
+    return ''.join(output)
 
 
 import os
 BKMK_DEBUG_HTML_PARSERS = os.environ.get("BKMK_DEBUG_HTML_PARSERS")
 
 def parse_html(html_text, charset=None, log=None):
-   if not parsers:
-       return None
-
-   if charset:
-      try:
-         codecs.lookup(charset) # In case of unknown charset...
-      except (ValueError, LookupError):
-         charset = None         # ...try charset from HTML
-
-   charsets = [universal_charset, DEFAULT_CHARSET]
-   if charset:
-      charset = charset.lower().replace("windows-", "cp")
-      if charset in charsets:
-         charsets.remove(charset)
-      charsets.insert(0, charset)
-
-   if BKMK_DEBUG_HTML_PARSERS:
-      _parsers = []
-   for p in parsers:
-      parser = None
-      for c in charsets:
-         try:
-            parser = p(html_text, c, log)
-         except UnicodeError:
-            pass
-         else:
-            if parser:
-               if BKMK_DEBUG_HTML_PARSERS:
-                  if log: log("   Parser %s: ok" % p.__module__)
-                  _parsers.append((p, parser))
-               break
-      else:
-         if log: log("   Parser %s: fail" % p.__module__)
-      if not BKMK_DEBUG_HTML_PARSERS and parser:
-         break
-
-   if BKMK_DEBUG_HTML_PARSERS:
-      if not _parsers:
-         if log: log("   All parsers have failed")
-         return None
-   elif not parser:
-       if log: log("   All parsers have failed")
-       return None
-
-   if BKMK_DEBUG_HTML_PARSERS:
-      p, parser = _parsers[0]
-   if log: log("   Using %s" % p.__module__)
-
-   converted_title = title = parser.title
-   if title and (not parser.charset):
-      try:
-         unicode(title, "ascii")
-      except UnicodeDecodeError:
-         parser.charset = DEFAULT_CHARSET
-
-   if parser.charset:
-      parser.charset = parser.charset.lower().replace("windows-", "cp")
-
-   if title and parser.charset and (
-         (parser.charset != universal_charset) or
-         ((not charset) or (charset != parser.charset))):
-      try:
-         if parser.meta_charset:
-            if log: log("   META charset   : %s" % parser.charset)
-         elif (not charset) or (charset != parser.charset):
-            if log: log("   guessed charset: %s" % parser.charset)
-         #if log: log("   current charset: %s" % universal_charset)
-         if log: log("   title          : %s" % title)
-         if parser.charset != universal_charset:
+    if not parsers:
+        return None
+
+    if charset:
+        try:
+            codecs.lookup(charset) # In case of unknown charset...
+        except (ValueError, LookupError):
+            charset = None         # ...try charset from HTML
+
+    charsets = [universal_charset, DEFAULT_CHARSET]
+    if charset:
+        charset = charset.lower().replace("windows-", "cp")
+        if charset in charsets:
+            charsets.remove(charset)
+        charsets.insert(0, charset)
+
+    if BKMK_DEBUG_HTML_PARSERS:
+        _parsers = []
+    for p in parsers:
+        parser = None
+        for c in charsets:
             try:
-               converted_title = unicode(title, parser.charset).encode(universal_charset)
+                parser = p(html_text, c, log)
             except UnicodeError:
-               if log: log("   incorrect conversion from %s, converting from %s" % (parser.charset, DEFAULT_CHARSET))
-               converted_title = unicode(title, DEFAULT_CHARSET, "replace").encode(universal_charset, "replace")
-               parser.charset = DEFAULT_CHARSET
-         if log and (converted_title != title): log("   converted title: %s" % converted_title)
-      except LookupError:
-         if log: log("   unknown charset: '%s'" % parser.charset)
-   else:
-      if log: log("   title          : %s" % title)
-
-   if title:
-      final_title = recode_entities(converted_title, universal_charset)
-      parts = [s.strip() for s in final_title.replace('\r', '').split('\n')]
-      final_title = ' '.join([s for s in parts if s])
-      if log and (final_title != converted_title): log("   final title    : %s" % final_title)
-      parser.title = final_title
-
-   icon = parser.icon
-   if isinstance(icon, unicode):
-       try:
-           parser.icon = icon.encode('ascii')
-       except UnicodeEncodeError:
-           if parser.charset:
-               parser.icon = icon.encode(parser.charset)
-   return parser
+                pass
+            else:
+                if parser:
+                    if BKMK_DEBUG_HTML_PARSERS:
+                        if log: log("   Parser %s: ok" % p.__module__)
+                        _parsers.append((p, parser))
+                    break
+        else:
+            if log: log("   Parser %s: fail" % p.__module__)
+        if not BKMK_DEBUG_HTML_PARSERS and parser:
+            break
+
+    if BKMK_DEBUG_HTML_PARSERS:
+        if not _parsers:
+            if log: log("   All parsers have failed")
+            return None
+    elif not parser:
+        if log: log("   All parsers have failed")
+        return None
+
+    if BKMK_DEBUG_HTML_PARSERS:
+        p, parser = _parsers[0]
+    if log: log("   Using %s" % p.__module__)
+
+    converted_title = title = parser.title
+    if title and (not parser.charset):
+        try:
+            unicode(title, "ascii")
+        except UnicodeDecodeError:
+            parser.charset = DEFAULT_CHARSET
+
+    if parser.charset:
+        parser.charset = parser.charset.lower().replace("windows-", "cp")
+
+    if title and parser.charset and (
+          (parser.charset != universal_charset) or
+          ((not charset) or (charset != parser.charset))):
+        try:
+            if parser.meta_charset:
+                if log: log("   META charset   : %s" % parser.charset)
+            elif (not charset) or (charset != parser.charset):
+                if log: log("   guessed charset: %s" % parser.charset)
+            #if log: log("   current charset: %s" % universal_charset)
+            if log: log("   title          : %s" % title)
+            if parser.charset != universal_charset:
+                try:
+                    converted_title = unicode(title, parser.charset).encode(universal_charset)
+                except UnicodeError:
+                    if log: log("   incorrect conversion from %s, converting from %s" % (parser.charset, DEFAULT_CHARSET))
+                    converted_title = unicode(title, DEFAULT_CHARSET, "replace").encode(universal_charset, "replace")
+                    parser.charset = DEFAULT_CHARSET
+            if log and (converted_title != title): log("   converted title: %s" % converted_title)
+        except LookupError:
+            if log: log("   unknown charset: '%s'" % parser.charset)
+    else:
+        if log: log("   title          : %s" % title)
+
+    if title:
+        final_title = recode_entities(converted_title, universal_charset)
+        parts = [s.strip() for s in final_title.replace('\r', '').split('\n')]
+        final_title = ' '.join([s for s in parts if s])
+        if log and (final_title != converted_title): log("   final title    : %s" % final_title)
+        parser.title = final_title
+
+    icon = parser.icon
+    if isinstance(icon, unicode):
+        try:
+            parser.icon = icon.encode('ascii')
+        except UnicodeEncodeError:
+            if parser.charset:
+                parser.icon = icon.encode(parser.charset)
+    return parser
 
 def parse_filename(filename, charset=None, log=None):
     fp = open(filename, 'r')
index a2f57157db0347d71592d6732259b380972e8001..f796744a406812e92cbd208b8b132adf88f47643 100644 (file)
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2007-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2007-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['parse_html']
@@ -21,130 +21,130 @@ DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
 # http://groups.google.com/group/beautifulsoup/browse_thread/thread/69093cb0d3a3cf63
 class BadDeclParser(BeautifulSoup):
     def parse_declaration(self, i):
-         """Treat a bogus SGML declaration as raw data. Treat a CDATA
-         declaration as a CData object."""
-         j = None
-         if self.rawdata[i:i+9] == '<![CDATA[':
-              k = self.rawdata.find(']]>', i)
-              if k == -1:
-                  k = len(self.rawdata)
-              data = self.rawdata[i+9:k]
-              j = k+3
-              self._toStringSubclass(data, CData)
-         else:
-             try:
-                 j = SGMLParser.parse_declaration(self, i)
-             except SGMLParseError:
-                 # Could not parse the DOCTYPE declaration
-                 # Try to just skip the actual declaration
-                 match = re.search(r'<!DOCTYPE([^>]*?)>', self.rawdata[i:], re.MULTILINE|re.IGNORECASE)
-                 if match:
-                     toHandle = self.rawdata[i:match.end()]
-                 else:
-                     toHandle = self.rawdata[i:]
-                 self.handle_data(toHandle)
-                 j = i + len(toHandle)
-         return j
+        """Treat a bogus SGML declaration as raw data. Treat a CDATA
+        declaration as a CData object."""
+        j = None
+        if self.rawdata[i:i+9] == '<![CDATA[':
+            k = self.rawdata.find(']]>', i)
+            if k == -1:
+                k = len(self.rawdata)
+            data = self.rawdata[i+9:k]
+            j = k+3
+            self._toStringSubclass(data, CData)
+        else:
+            try:
+                j = SGMLParser.parse_declaration(self, i)
+            except SGMLParseError:
+                # Could not parse the DOCTYPE declaration
+                # Try to just skip the actual declaration
+                match = re.search(r'<!DOCTYPE([^>]*?)>', self.rawdata[i:], re.MULTILINE|re.IGNORECASE)
+                if match:
+                    toHandle = self.rawdata[i:match.end()]
+                else:
+                    toHandle = self.rawdata[i:]
+                self.handle_data(toHandle)
+                j = i + len(toHandle)
+        return j
 
 
 def _parse_html(html_text, charset):
-   try:
-      return BadDeclParser(html_text, fromEncoding=charset)
-   except TypeError:
-      return None
+    try:
+        return BadDeclParser(html_text, fromEncoding=charset)
+    except TypeError:
+        return None
 
 def parse_html(html_text, charset=None, log=None):
-   root = _parse_html(html_text, charset)
-   if root is None:
-      return None
-
-   _charset = root.originalEncoding
-   if _charset in ("ISO-8859-2", "windows-1252", "MacCyrillic"): # Replace default
-      _charset = DEFAULT_CHARSET
-      root = _parse_html(html_text, _charset)
-      if root is None:
-         return None
-
-   html = root.html
-   if html is None:
-      html = root
-
-   head = html.head
-   if head is None:
-      head = html # Some sites put TITLE in HTML without HEAD
-
-   title = head.title
-   if (title is None) and (html is not head):
-      # Some sites put TITLE in HTML outside of HEAD
-      title = html.title
-
-   if title is None:
-      # Lookup TITLE in the root
-      title = root.title
-
-   if title is not None:
-       if title.string:
-          title = title.string
-       else:
-          parts = []
-          for part in title:
-             if not isinstance(part, basestring):
-                part = unicode(part)
-             parts.append(part.strip())
-          title = ''.join(parts)
-
-   meta = head.find(_find_contenttype, recursive=False)
-   if meta:
-      try:
-         meta_content = meta.get("content")
-         if meta_content:
-             __charset = meta_content.lower().split('charset=')[1].split(';')[0]
-         else:
-             __charset = False
-      except IndexError: # No charset in the META Content-Type
-         meta_charset = False
-      else:
-         meta_charset = _charset == __charset
-   else:
-      meta_charset = False
-
-   if not meta_charset:
-      meta = head.find(_find_charset, recursive=False)
-      if meta:
-         meta_content = meta.get("charset")
-         if meta_content:
-            meta_charset = _charset = meta_content.lower()
-
-   if title and (_charset or meta_charset):
-      title = title.encode(_charset or meta_charset)
-
-   meta = head.find(_find_refresh, recursive=False)
-   if meta:
-      refresh = meta.get("content")
-   else:
-      refresh = None
-
-   meta = head.find(_find_icon, recursive=False)
-   if meta:
-      icon = meta.get("href")
-   else:
-      icon = None
-
-   if (title is None) and (refresh is None) and (icon is None):
-      return None
-   return HTMLParser(_charset, meta_charset, title, refresh, icon)
+    root = _parse_html(html_text, charset)
+    if root is None:
+        return None
+
+    _charset = root.originalEncoding
+    if _charset in ("ISO-8859-2", "windows-1252", "MacCyrillic"): # Replace default
+        _charset = DEFAULT_CHARSET
+        root = _parse_html(html_text, _charset)
+        if root is None:
+            return None
+
+    html = root.html
+    if html is None:
+        html = root
+
+    head = html.head
+    if head is None:
+        head = html # Some sites put TITLE in HTML without HEAD
+
+    title = head.title
+    if (title is None) and (html is not head):
+        # Some sites put TITLE in HTML outside of HEAD
+        title = html.title
+
+    if title is None:
+        # Lookup TITLE in the root
+        title = root.title
+
+    if title is not None:
+        if title.string:
+            title = title.string
+        else:
+            parts = []
+            for part in title:
+                if not isinstance(part, basestring):
+                    part = unicode(part)
+                parts.append(part.strip())
+            title = ''.join(parts)
+
+    meta = head.find(_find_contenttype, recursive=False)
+    if meta:
+        try:
+            meta_content = meta.get("content")
+            if meta_content:
+                __charset = meta_content.lower().split('charset=')[1].split(';')[0]
+            else:
+                __charset = False
+        except IndexError: # No charset in the META Content-Type
+            meta_charset = False
+        else:
+            meta_charset = _charset == __charset
+    else:
+        meta_charset = False
+
+    if not meta_charset:
+        meta = head.find(_find_charset, recursive=False)
+        if meta:
+            meta_content = meta.get("charset")
+            if meta_content:
+                meta_charset = _charset = meta_content.lower()
+
+    if title and (_charset or meta_charset):
+        title = title.encode(_charset or meta_charset)
+
+    meta = head.find(_find_refresh, recursive=False)
+    if meta:
+        refresh = meta.get("content")
+    else:
+        refresh = None
+
+    meta = head.find(_find_icon, recursive=False)
+    if meta:
+        icon = meta.get("href")
+    else:
+        icon = None
+
+    if (title is None) and (refresh is None) and (icon is None):
+        return None
+    return HTMLParser(_charset, meta_charset, title, refresh, icon)
 
 def _find_contenttype(Tag):
-   return (Tag.name == "meta") and \
-      (Tag.get("http-equiv", '').lower() == "content-type")
+    return (Tag.name == "meta") and \
+       (Tag.get("http-equiv", '').lower() == "content-type")
 
 def _find_charset(Tag):
-   return (Tag.name == "meta") and Tag.get("charset", '')
+    return (Tag.name == "meta") and Tag.get("charset", '')
 
 def _find_refresh(Tag):
-   return (Tag.name == "meta") and \
-      (Tag.get("http-equiv", '').lower() == "refresh")
+    return (Tag.name == "meta") and \
+       (Tag.get("http-equiv", '').lower() == "refresh")
 
 def _find_icon(Tag):
-   return (Tag.name == "link") and \
-      (Tag.get("rel", '').lower() in ('icon', 'shortcut icon'))
+    return (Tag.name == "link") and \
+       (Tag.get("rel", '').lower() in ('icon', 'shortcut icon'))
index fc596b18712b460085603e09fcf3ca9a381837ff..09aa2a3773642cf08df73e100206d782602e8685 100644 (file)
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['parse_html']
@@ -43,8 +43,8 @@ def parse_html(html_text, charset=None, log=None):
                 except IndexError:
                     meta_charset = False
         elif m.get('charset', ''):
-           meta_charset = m.get('charset').lower()
-           break
+            meta_charset = m.get('charset').lower()
+            break
     else:
         meta_charset = False
 
index 0798467386bda9ab993df160f1d4937b1e1802d9..45e89f5119817787b0b926c7f43627a7dc22e7dd 100644 (file)
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['parse_html']
@@ -19,80 +19,80 @@ class HTMLHeadDone(Exception): pass
 
 
 class HTMLParser(_HTMLParser):
-   def __init__(self, charset=None):
-      _HTMLParser.__init__(self)
-      self.charset = charset
-      self.meta_charset = 0
-      self.title = None
-      self.refresh = None
-      self.icon = None
-
-   def end_head(self):
-      raise HTMLHeadDone()
-
-   def do_meta(self, attrs):
-      http_equiv = ""
-      content = ""
-
-      for attrname, value in attrs:
-         if value:
-            value = value.strip()
-            if attrname == 'http-equiv':
-               http_equiv = value.lower()
-            elif attrname == 'content':
-               content = value
-            elif (attrname == 'charset') and (not self.charset):
-               self.charset = value.lower()
-               self.meta_charset = 1
-
-      if (not self.charset) and (http_equiv == "content-type"):
-         try:
-            # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
-            self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0]
-            self.meta_charset = 1 # Remember that the charset was retrieved from
-                                  # META tag, not from the Content-Type header
-         except IndexError:
-            pass
-
-      if http_equiv == "refresh":
-         self.refresh = content
-
-   def start_title(self, attrs):
-      self.accumulator = ''
-
-   def end_title(self):
-      if not self.title: # use only the first title
-         self.title = self.accumulator
-
-   def do_link(self, attrs):
-      has_icon = False
-      href = None
-
-      for attrname, value in attrs:
-         if value:
-            value = value.strip()
-            if (attrname == 'rel') and (value.lower() in ('icon', 'shortcut icon')):
-               has_icon = True
-            elif attrname == 'href':
-               href = value
-
-      if has_icon:
-         self.icon = href
+    def __init__(self, charset=None):
+        _HTMLParser.__init__(self)
+        self.charset = charset
+        self.meta_charset = 0
+        self.title = None
+        self.refresh = None
+        self.icon = None
+
+    def end_head(self):
+        raise HTMLHeadDone()
+
+    def do_meta(self, attrs):
+        http_equiv = ""
+        content = ""
+
+        for attrname, value in attrs:
+            if value:
+                value = value.strip()
+                if attrname == 'http-equiv':
+                    http_equiv = value.lower()
+                elif attrname == 'content':
+                    content = value
+                elif (attrname == 'charset') and (not self.charset):
+                    self.charset = value.lower()
+                    self.meta_charset = 1
+
+        if (not self.charset) and (http_equiv == "content-type"):
+            try:
+                # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+                self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0]
+                self.meta_charset = 1 # Remember that the charset was retrieved from
+                                      # META tag, not from the Content-Type header
+            except IndexError:
+                pass
+
+        if http_equiv == "refresh":
+            self.refresh = content
+
+    def start_title(self, attrs):
+        self.accumulator = ''
+
+    def end_title(self):
+        if not self.title: # use only the first title
+            self.title = self.accumulator
+
+    def do_link(self, attrs):
+        has_icon = False
+        href = None
+
+        for attrname, value in attrs:
+            if value:
+                value = value.strip()
+                if (attrname == 'rel') and (value.lower() in ('icon', 'shortcut icon')):
+                    has_icon = True
+                elif attrname == 'href':
+                    href = value
+
+        if has_icon:
+            self.icon = href
 
 
 def parse_html(html_text, charset=None, log=None):
-   parser = HTMLParser(charset)
+    parser = HTMLParser(charset)
 
-   try:
-      parser.feed(html_text)
-   except (HTMLParseError, HTMLHeadDone):
-      pass
+    try:
+        parser.feed(html_text)
+    except (HTMLParseError, HTMLHeadDone):
+        pass
 
-   try:
-      parser.close()
-   except (HTMLParseError, HTMLHeadDone):
-      pass
+    try:
+        parser.close()
+    except (HTMLParseError, HTMLHeadDone):
+        pass
 
-   if (parser.title is None) and (parser.refresh is None) and (parser.icon is None):
-      return None
-   return parser
+    if (parser.title is None) and (parser.refresh is None) and (parser.icon is None):
+        return None
+    return parser
index 9cd29e484068ed90c45eac906ed2c74f15bbd1bf..1fa47917deaf81fb6b40dbb413498e45dec7eadc 100644 (file)
@@ -5,7 +5,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2014 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['parse_html']
@@ -37,8 +37,8 @@ def parse_html(html_text, charset=None, log=None):
                 except IndexError:
                     meta_charset = False
         elif m.get('charset', ''):
-           meta_charset = m.get('charset').lower()
-           break
+            meta_charset = m.get('charset').lower()
+            break
     else:
         meta_charset = False
 
index 400c8dabc4a34ce48bb36766e8b3c3d0910a1879..0e2d529a7ca2e4671f6599c5bd9bd2b8b4086468 100644 (file)
@@ -4,7 +4,7 @@ This file is a part of Bookmarks database and Internet robot.
 """
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2012 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2017 PhiloSoft Design"
 __license__ = "GNU GPL"
 
 __all__ = ['HTMLParser']
@@ -13,10 +13,10 @@ __all__ = ['HTMLParser']
 from HTMLParser import HTMLParser
 
 class HTMLParser(HTMLParser):
-   def __init__(self, charset, meta_charset, title, refresh, icon):
-      object.__init__(self)
-      self.charset = charset
-      self.meta_charset = meta_charset
-      self.title = title
-      self.refresh = refresh
-      self.icon = icon
+    def __init__(self, charset, meta_charset, title, refresh, icon):
+        object.__init__(self)
+        self.charset = charset
+        self.meta_charset = meta_charset
+        self.title = title
+        self.refresh = refresh
+        self.icon = icon
index 77dc446d33463a499b222b469b97777db90ab743..543e6ec783152c4caaeaaddba4f55bb71e78da76 100644 (file)
--- a/robots.py
+++ b/robots.py
@@ -17,9 +17,9 @@ from bkmk_objects import parse_params, set_params
 robot_name, robot_params = parse_params(environ.get("BKMK_ROBOT", "forking"))
 
 def import_robot(robot_name):
-   exec("from Robots import bkmk_r%s" % robot_name)
-   exec("robot = bkmk_r%s.robot_%s" % (robot_name, robot_name))
-   return robot
+    exec("from Robots import bkmk_r%s" % robot_name)
+    exec("robot = bkmk_r%s.robot_%s" % (robot_name, robot_name))
+    return robot
 
 robot = import_robot(robot_name)
 set_params(robot, robot_params)
index 00292217f52cb0de125aa2ec822992c2ea297cc9..a97a421ce84fcf44b20d93d089f14da19f2c59d6 100755 (executable)
@@ -14,75 +14,75 @@ __license__ = "GNU GPL"
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "s")
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "s")
 
-   report_stats = 1
+    report_stats = 1
 
-   for _opt, _arg in optlist:
-      if _opt == '-s':
-         report_stats = 0
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
 
-   if report_stats:
-      print("Broytman set-real_title, Copyright (C) 2003-2017 PhiloSoft Design")
+    if report_stats:
+        print("Broytman set-real_title, Copyright (C) 2003-2017 PhiloSoft Design")
 
-   if args:
-      sys.stderr.write("set-real_title: too many arguments\n")
-      sys.stderr.write("Usage: set-real_title [-s]\n")
-      sys.exit(1)
+    if args:
+        sys.stderr.write("set-real_title: too many arguments\n")
+        sys.stderr.write("Usage: set-real_title [-s]\n")
+        sys.exit(1)
 
-   from storage import storage
-   storage = storage()
+    from storage import storage
+    storage = storage()
 
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
 
-   root_folder = storage.load()
-   from bkmk_objects import make_linear
-   make_linear(root_folder)
-   objects = len(root_folder.linear)
+    root_folder = storage.load()
+    from bkmk_objects import make_linear
+    make_linear(root_folder)
+    objects = len(root_folder.linear)
 
-   if report_stats:
-      print("Ok")
+    if report_stats:
+        print("Ok")
 
 
-   changed = 0
-   for object_no in range(objects):
-      object = root_folder.linear[object_no]
+    changed = 0
+    for object_no in range(objects):
+        object = root_folder.linear[object_no]
 
-      if object.isBookmark:
-         if not hasattr(object, "real_title"):
-            continue
+        if object.isBookmark:
+            if not hasattr(object, "real_title"):
+                continue
 
-         real_title = object.real_title
-         if not real_title:
-            real_title = object.href
-         if object.name != real_title:
-            object.name = real_title
-            changed += 1
+            real_title = object.real_title
+            if not real_title:
+                real_title = object.href
+            if object.name != real_title:
+                object.name = real_title
+                changed += 1
 
 
-   if changed and report_stats:
-      sys.stdout.write("Saving %s: " % storage.filename)
-      sys.stdout.flush()
+    if changed and report_stats:
+        sys.stdout.write("Saving %s: " % storage.filename)
+        sys.stdout.flush()
 
-   if not changed and report_stats:
-      sys.stdout.write("No need to save data\n")
-      sys.stdout.flush()
+    if not changed and report_stats:
+        sys.stdout.write("No need to save data\n")
+        sys.stdout.flush()
 
-   if changed:
-      storage.store(root_folder)
+    if changed:
+        storage.store(root_folder)
 
-   if changed and report_stats:
-      print("Ok")
-      print(objects, "objects passed")
-      print(changed, "objects changed")
+    if changed and report_stats:
+        print("Ok")
+        print(objects, "objects passed")
+        print(changed, "objects changed")
 
 
 if __name__ == '__main__':
-   run()
+    run()
index 74acae49f3af128eb2c8ed5b0852c284df468a57..6a4974238b4ea6b7de2856b30255782c376c7def 100755 (executable)
@@ -14,106 +14,106 @@ __license__ = "GNU GPL"
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "s")
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "s")
 
-   report_stats = 1
+    report_stats = 1
 
-   for _opt, _arg in optlist:
-      if _opt == '-s':
-         report_stats = 0
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
+    for _opt, _arg in optlist:
+        if _opt == '-s':
+            report_stats = 0
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
 
-   if report_stats:
-      print("Broytman set-title-list, Copyright (C) 2003-2017 PhiloSoft Design")
+    if report_stats:
+        print("Broytman set-title-list, Copyright (C) 2003-2017 PhiloSoft Design")
 
-   if len(args) != 1:
-      sys.stderr.write("Usage: set-title-list [-s] title_list_file\n")
-      sys.exit(1)
+    if len(args) != 1:
+        sys.stderr.write("Usage: set-title-list [-s] title_list_file\n")
+        sys.exit(1)
 
-   # Read the external file with titles and build a mapping (URL => title)
-   titles_dict = {}
+    # Read the external file with titles and build a mapping (URL => title)
+    titles_dict = {}
 
-   URL = None
-   title = None
+    URL = None
+    title = None
 
-   title_list_file = open(args[0], 'r')
-   for line in title_list_file:
-      line = line[:-1] # strip trailing newline
-      if URL is None:
-         URL = line
+    title_list_file = open(args[0], 'r')
+    for line in title_list_file:
+        line = line[:-1] # strip trailing newline
+        if URL is None:
+            URL = line
 
-      elif title is None:
-         title = line
+        elif title is None:
+            title = line
 
-      elif line: # the third line in every 3 lines must be empty
-         raise ValueError("line is not empty for URL `%s', title `%s': line `%s'" % (URL, title, line))
+        elif line: # the third line in every 3 lines must be empty
+            raise ValueError("line is not empty for URL `%s', title `%s': line `%s'" % (URL, title, line))
 
-      else: # We've got 3 lines - add new entry to the mapping
-         if titles_dict.has_key(URL):
-            if title != titles_dict[URL]:
-               raise ValueError("titles are not identical for URL `%s': `%s' != `%s'" % (URL, title, titles_dict[URL]))
+        else: # We've got 3 lines - add new entry to the mapping
+            if titles_dict.has_key(URL):
+                if title != titles_dict[URL]:
+                    raise ValueError("titles are not identical for URL `%s': `%s' != `%s'" % (URL, title, titles_dict[URL]))
 
-         else:
-            titles_dict[URL] = title
+            else:
+                titles_dict[URL] = title
 
-         # reset
-         URL = None
-         title = None
+            # reset
+            URL = None
+            title = None
 
-   title_list_file.close()
+    title_list_file.close()
 
 
-   from storage import storage
-   storage = storage()
+    from storage import storage
+    storage = storage()
 
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
 
-   root_folder = storage.load()
-   from bkmk_objects import make_linear, break_tree
-   make_linear(root_folder)
-   objects = len(root_folder.linear)
+    root_folder = storage.load()
+    from bkmk_objects import make_linear, break_tree
+    make_linear(root_folder)
+    objects = len(root_folder.linear)
 
-   if report_stats:
-      print("Ok")
+    if report_stats:
+        print("Ok")
 
 
-   # Run through the list of objects and check URLs/titles
-   changed = 0
-   for object_no in range(objects):
-      object = root_folder.linear[object_no]
+    # Run through the list of objects and check URLs/titles
+    changed = 0
+    for object_no in range(objects):
+        object = root_folder.linear[object_no]
 
-      if object.isBookmark:
-         URL = object.href
-         if titles_dict.has_key(URL):
-            name = titles_dict[URL]
-            if object.name != name:
-               object.name = name
-               changed += 1
+        if object.isBookmark:
+            URL = object.href
+            if titles_dict.has_key(URL):
+                name = titles_dict[URL]
+                if object.name != name:
+                    object.name = name
+                    changed += 1
 
 
-   if changed and report_stats:
-      sys.stdout.write("Saving %s: " % storage.filename)
-      sys.stdout.flush()
+    if changed and report_stats:
+        sys.stdout.write("Saving %s: " % storage.filename)
+        sys.stdout.flush()
 
-   if not changed and report_stats:
-      sys.stdout.write("No need to save data\n")
-      sys.stdout.flush()
+    if not changed and report_stats:
+        sys.stdout.write("No need to save data\n")
+        sys.stdout.flush()
 
-   if changed:
-      break_tree(root_folder.linear)
-      storage.store(root_folder)
+    if changed:
+        break_tree(root_folder.linear)
+        storage.store(root_folder)
 
-   if changed and report_stats:
-      print("Ok")
-      print(objects, "objects passed")
-      print(changed, "objects changed")
+    if changed and report_stats:
+        print("Ok")
+        print(objects, "objects passed")
+        print(changed, "objects changed")
 
 
 if __name__ == '__main__':
-   run()
+    run()
index 9a478bf75c394513411e762c2ccc2cdb9d4a30cf..03026e6db3501a05d7f0de92734881b0229f9d5c 100755 (executable)
@@ -20,101 +20,101 @@ import sys
 
 
 class SortBy(object):
-   def __init__(self, sort_by):
-      self.sort_by = sort_by
+    def __init__(self, sort_by):
+        self.sort_by = sort_by
 
-   def __call__(self, o1, o2):
-      try:
-         attr1 = int(getattr(o1, self.sort_by))
-      except (AttributeError, TypeError, ValueError):
-         return 1
+    def __call__(self, o1, o2):
+        try:
+            attr1 = int(getattr(o1, self.sort_by))
+        except (AttributeError, TypeError, ValueError):
+            return 1
 
-      try:
-         attr2 = int(getattr(o2, self.sort_by))
-      except (AttributeError, TypeError, ValueError):
-         return -1
+        try:
+            attr2 = int(getattr(o2, self.sort_by))
+        except (AttributeError, TypeError, ValueError):
+            return -1
 
-      return cmp(attr1, attr2)
+        return cmp(attr1, attr2)
 
 
 def walk_linear(linear, walker):
-   for object in linear:
-      if object.isBookmark:
-         walker.bookmark(object, 0)
+    for object in linear:
+        if object.isBookmark:
+            walker.bookmark(object, 0)
 
 
 def run():
-   from getopt import getopt
-   optlist, args = getopt(sys.argv[1:], "avmztrs")
-
-   sort_by = "last_modified"
-   reverse = 0
-   report_stats = 1
-
-   for _opt, _arg in optlist:
-      if _opt == '-a':
-         sort_by = "add_date"
-      elif _opt == '-v':
-         sort_by = "last_visit"
-      elif _opt == '-m':
-         sort_by = "last_modified"
-      elif _opt == '-z':
-         sort_by = "size"
-      elif _opt == '-t':
-         sort_by = "last_tested"
-      elif _opt == '-r':
-         reverse = 1
-      elif _opt == '-s':
-         report_stats = 0
-   try:
-      del _opt, _arg
-   except NameError:
-      pass
-
-   from storage import storage
-   storage = storage()
-
-   if report_stats:
-      sys.stdout.write("Loading %s: " % storage.filename)
-      sys.stdout.flush()
-
-   root_folder = storage.load()
-
-   if report_stats:
-      print("Ok")
-      sys.stdout.write("Sorting (by %s): " % sort_by)
-      sys.stdout.flush()
-
-   from bkmk_objects import make_linear
-   make_linear(root_folder)
-
-   linear = root_folder.linear
-   del linear[0] # exclude root folder from sorting
-
-   by = SortBy(sort_by)
-   linear.sort(by)
-
-   from writers import writer
-   output_filename = "%s-sorted_by-%s" % (writer.filename, sort_by)
-
-   if reverse:
-      linear.reverse()
-      output_filename = output_filename + "-reverse"
-
-   if report_stats:
-      print("done")
-      sys.stdout.write("Writing %s: " % output_filename)
-      sys.stdout.flush()
-
-   outfile = open(output_filename, 'w')
-   writer = writer(outfile)
-   writer.root_folder(root_folder)
-   walk_linear(linear, writer)
-   outfile.close()
-
-   if report_stats:
-      print("Ok")
+    from getopt import getopt
+    optlist, args = getopt(sys.argv[1:], "avmztrs")
+
+    sort_by = "last_modified"
+    reverse = 0
+    report_stats = 1
+
+    for _opt, _arg in optlist:
+        if _opt == '-a':
+            sort_by = "add_date"
+        elif _opt == '-v':
+            sort_by = "last_visit"
+        elif _opt == '-m':
+            sort_by = "last_modified"
+        elif _opt == '-z':
+            sort_by = "size"
+        elif _opt == '-t':
+            sort_by = "last_tested"
+        elif _opt == '-r':
+            reverse = 1
+        elif _opt == '-s':
+            report_stats = 0
+    try:
+        del _opt, _arg
+    except NameError:
+        pass
+
+    from storage import storage
+    storage = storage()
+
+    if report_stats:
+        sys.stdout.write("Loading %s: " % storage.filename)
+        sys.stdout.flush()
+
+    root_folder = storage.load()
+
+    if report_stats:
+        print("Ok")
+        sys.stdout.write("Sorting (by %s): " % sort_by)
+        sys.stdout.flush()
+
+    from bkmk_objects import make_linear
+    make_linear(root_folder)
+
+    linear = root_folder.linear
+    del linear[0] # exclude root folder from sorting
+
+    by = SortBy(sort_by)
+    linear.sort(by)
+
+    from writers import writer
+    output_filename = "%s-sorted_by-%s" % (writer.filename, sort_by)
+
+    if reverse:
+        linear.reverse()
+        output_filename = output_filename + "-reverse"
+
+    if report_stats:
+        print("done")
+        sys.stdout.write("Writing %s: " % output_filename)
+        sys.stdout.flush()
+
+    outfile = open(output_filename, 'w')
+    writer = writer(outfile)
+    writer.root_folder(root_folder)
+    walk_linear(linear, writer)
+    outfile.close()
+
+    if report_stats:
+        print("Ok")
 
 
 if __name__ == '__main__':
-   run()
+    run()
index 7b6ca06ed31328b93425540718bda75716646e93..a71cdeb75af78896399d5746557c5a41840044a8 100644 (file)
@@ -17,9 +17,9 @@ from bkmk_objects import parse_params, set_params
 storage_name, storage_params = parse_params(environ.get("BKMK_STORAGE", "pickle"))
 
 def import_storage(storage_name):
-   exec("from Storage import bkmk_st%s" % storage_name)
-   exec("storage = bkmk_st%s.storage_%s" % (storage_name, storage_name))
-   return storage
+    exec("from Storage import bkmk_st%s" % storage_name)
+    exec("storage = bkmk_st%s.storage_%s" % (storage_name, storage_name))
+    return storage
 
 storage = import_storage(storage_name)
 set_params(storage, storage_params)
index c7fd11629c52a526d2cef7b1e1eb63ede7565fac..0280aad6c9fcc10fa0d87c562f44fb6b08eacf3a 100644 (file)
@@ -16,7 +16,7 @@ Subprocess class features:
 
 __version__ = "Revision: 1.15 "
 
-# Id: subproc.py,v 1.15 1998/12/14 20:53:16 klm Exp 
+# Id: subproc.py,v 1.15 1998/12/14 20:53:16 klm Exp
 # Originally by ken manheimer, ken.manheimer@nist.gov, jan 1995.
 
 # Prior art: Initially based python code examples demonstrating usage of pipes
@@ -206,7 +206,7 @@ class Subprocess:
             got0 = self.readPendingChars(n)
             got = got + got0
             n = n - len(got0)
-        return got      
+        return got
     def readPendingChars(self, max=None):
         """Read all currently pending subprocess output as a single string."""
         return self.readbuf.readPendingChars(max)
@@ -401,15 +401,15 @@ class ReadBuf:
 
         got = ""
         if self.buf:
-             if (max > 0) and (len(self.buf) > max):
-                 got = self.buf[0:max]
-                 self.buf = self.buf[max:]
-             else:
-                 got, self.buf = self.buf, ''
-             return got                                         
+            if (max > 0) and (len(self.buf) > max):
+                got = self.buf[0:max]
+                self.buf = self.buf[max:]
+            else:
+                got, self.buf = self.buf, ''
+            return got
 
         if self.eof:
-             return ''
+            return ''
 
         sel = select.select([self.fd], [], [self.fd], 0)
         if sel[2]:
@@ -590,7 +590,7 @@ class Ph:
                 line = string.splitfields(line, ':')
                 it[string.strip(line[0])] = (
                     string.strip(string.join(line[1:])))
-        
+
     def getreply(self):
         """Consume next response from ph, returning list of lines or string
         err."""
index 952a23227bb9fb6da66e0abb408c841717256064..25c3cdb5afac49525f0051c383353ed937c240df 100644 (file)
@@ -17,9 +17,9 @@ from bkmk_objects import parse_params, set_params
 writer_name, writer_params = parse_params(environ.get("BKMK_WRITER", "html"))
 
 def import_writer(writer_name):
-   exec("from Writers import bkmk_w%s" % writer_name)
-   exec("writer = bkmk_w%s.writer_%s" % (writer_name, writer_name))
-   return writer
+    exec("from Writers import bkmk_w%s" % writer_name)
+    exec("writer = bkmk_w%s.writer_%s" % (writer_name, writer_name))
+    return writer
 
 writer = import_writer(writer_name)
 set_params(writer, writer_params)