]> git.phdru.name Git - bookmarks_db.git/blobdiff - bkmk_parser.py
Cleanup code: use 4 spaces
[bookmarks_db.git] / bkmk_parser.py
index 8f44f47f13bd1459a0f910dfbfd517e997ba6f46..6a69433ddf9ca3286cd3410284435b199c73252c 100644 (file)
@@ -19,208 +19,208 @@ from bkmk_objects import Folder, Bookmark, Ruler
 DEBUG = os.environ.has_key("BKMK_DEBUG")
 
 if DEBUG:
-   def debug(note):
-      print(note)
+    def debug(note):
+        print(note)
 
-   def dump_names(folder_stack):
-      l = []
-      for object in folder_stack:
-         if object.isFolder:
-            l.append(object.name)
-      return "'%s'" % "' '".join(l)
+    def dump_names(folder_stack):
+        l = []
+        for object in folder_stack:
+            if object.isFolder:
+                l.append(object.name)
+        return "'%s'" % "' '".join(l)
 
 else:
-   def debug(note):
-      pass
-   dump_names = debug
+    def debug(note):
+        pass
+    dump_names = debug
 
 
 class BkmkParser(HTMLParser):
-   def __init__(self):
-      HTMLParser.__init__(self)
-
-      self.urls = 0
-      self.objects = 0
-
-      self.charset = None
-      self.recode = None
-
-   def handle_data(self, data):
-      if data:
-         if self.charset and default_encoding:
-            data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace")
-         self.accumulator += data
-
-   # Mozilla - get charset
-   def do_meta(self, attrs):
-      http_equiv = ""
-      content = ""
-
-      for attrname, value in attrs:
-         value = value.strip()
-         if attrname == 'http-equiv':
-            http_equiv = value.lower()
-         elif attrname == 'content':
-            content = value
-
-      if http_equiv == "content-type":
-         try:
-            # extract charset from "text/html; charset=UTF-8"
-            self.charset = content.split('=')[1]
-         except IndexError:
-            pass
-
-   def start_title(self, attrs):
-      if default_encoding:
-         self.accumulator += '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%s">\n' % default_encoding
-      self.accumulator += "<TITLE>"
-
-   def end_title(self):
-      self.accumulator += "</TITLE>"
-
-   # Start root folder
-   def start_h1(self, attrs):
-      root_folder = Folder()
-      self.current_object = root_folder
-      self.root_folder = root_folder
-      self.current_folder = root_folder
-      self.folder_stack = [root_folder]
-
-      self.root_folder.header = self.accumulator.strip()
-      self.accumulator = ''
-
-   def end_h1(self):
-      accumulator = self.accumulator
-      self.accumulator = ''
-
-      debug("Root folder name: `%s'" % accumulator)
-      self.root_folder.name = accumulator
-
-   # Start a folder
-   def start_h3(self, attrs):
-      last_modified = None
-      for attrname, value in attrs:
-         value = value.strip()
-         if attrname == 'add_date':
-            add_date = value
-         elif attrname == 'last_modified':
-            last_modified = value
-
-      debug("New folder...")
-      folder = Folder(add_date, last_modified=last_modified)
-      self.current_object = folder
-      self.current_folder.append(folder)
-      self.folder_stack.append(folder) # push new folder
-      self.current_folder = folder
-      self.objects += 1
-
-   def end_h3(self):
-      accumulator = self.accumulator
-      self.accumulator = ''
-
-      debug("Folder name: `%s'" % accumulator)
-      self.current_folder.name = accumulator
-
-   # Start a bookmark
-   def start_a(self, attrs):
-      add_date = None
-      last_visit = None
-      last_modified = None
-      keyword = ''
-      icon = None
-      charset = None
-
-      for attrname, value in attrs:
-         value = value.strip()
-         if attrname == "href":
-            href = value
-         elif attrname == "add_date":
-            add_date = value
-         elif attrname == "last_visit":
-            last_visit = value
-         elif attrname == "last_modified":
-            last_modified = value
-         elif attrname == "shortcuturl":
-            keyword = value
-         elif attrname == "icon":
-            icon = value
-         elif attrname == "last_charset":
-            charset = value
-
-      debug("Bookmark points to: `%s'" % href)
-      bookmark = Bookmark(href, add_date, last_visit, last_modified,
-         keyword=keyword, icon=icon,
-         charset=charset, parser_charset=self.charset or default_encoding)
-      self.current_object = bookmark
-      self.current_folder.append(bookmark)
-      self.urls += 1
-      self.objects += 1
-
-   def end_a(self):
-      accumulator = self.accumulator
-      self.accumulator = ''
-
-      debug("Bookmark name: `%s'" % accumulator)
-      bookmark = self.current_folder[-1]
-      bookmark.name = accumulator
-
-   def flush(self):
-      accumulator = self.accumulator
-
-      if accumulator:
-         self.accumulator = ''
-
-         current_object = self.current_object
-         if current_object:
-            current_object.comment += accumulator.strip()
-            debug("Comment: `%s'" % current_object.comment)
-
-   def start_dl(self, attrs):
-      self.flush()
-
-   do_dt = start_dl
-
-   # End of folder
-   def end_dl(self):
-      self.flush()
-      debug("End folder")
-      debug("Folder stack: %s" % dump_names(self.folder_stack))
-      if self.folder_stack:
-         del self.folder_stack[-1] # pop last folder
-         if self.folder_stack:
-            self.current_folder = self.folder_stack[-1]
-         else:
-            debug("FOLDER STACK is EMPTY!!! (1)")
-      else:
-         debug("FOLDER STACK is EMPTY!!! (2)")
-      self.current_object = None
-
-   def close(self):
-      HTMLParser.close(self)
-      if self.folder_stack:
-         raise ValueError("wrong folder stack: %s" % self.folder_stack)
-
-   def do_dd(self, attrs):
-      pass
-
-   do_p = do_dd
-
-   # Start ruler
-   def do_hr(self, attrs):
-      self.flush()
-      debug("Ruler")
-      self.current_folder.append(Ruler())
-      self.current_object = None
-      self.objects += 1
-
-   # BR in comment
-   def do_br(self, attrs):
-      self.accumulator += "<BR>"
-
-   # Allow < in the text
-   def unknown_starttag(self, tag, attrs):
-      self.accumulator += "<%s>" % tag
-
-   # Do not allow unknow end tags
-   def unknown_endtag(self, tag):
-      raise NotImplementedError("Unknow end tag `%s'" % tag)
+    def __init__(self):
+        HTMLParser.__init__(self)
+
+        self.urls = 0
+        self.objects = 0
+
+        self.charset = None
+        self.recode = None
+
+    def handle_data(self, data):
+        if data:
+            if self.charset and default_encoding:
+                data = unicode(data, self.charset, "replace").encode(default_encoding, "xmlcharrefreplace")
+            self.accumulator += data
+
+    # Mozilla - get charset
+    def do_meta(self, attrs):
+        http_equiv = ""
+        content = ""
+
+        for attrname, value in attrs:
+            value = value.strip()
+            if attrname == 'http-equiv':
+                http_equiv = value.lower()
+            elif attrname == 'content':
+                content = value
+
+        if http_equiv == "content-type":
+            try:
+                # extract charset from "text/html; charset=UTF-8"
+                self.charset = content.split('=')[1]
+            except IndexError:
+                pass
+
+    def start_title(self, attrs):
+        if default_encoding:
+            self.accumulator += '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%s">\n' % default_encoding
+        self.accumulator += "<TITLE>"
+
+    def end_title(self):
+        self.accumulator += "</TITLE>"
+
+    # Start root folder
+    def start_h1(self, attrs):
+        root_folder = Folder()
+        self.current_object = root_folder
+        self.root_folder = root_folder
+        self.current_folder = root_folder
+        self.folder_stack = [root_folder]
+
+        self.root_folder.header = self.accumulator.strip()
+        self.accumulator = ''
+
+    def end_h1(self):
+        accumulator = self.accumulator
+        self.accumulator = ''
+
+        debug("Root folder name: `%s'" % accumulator)
+        self.root_folder.name = accumulator
+
+    # Start a folder
+    def start_h3(self, attrs):
+        last_modified = None
+        for attrname, value in attrs:
+            value = value.strip()
+            if attrname == 'add_date':
+                add_date = value
+            elif attrname == 'last_modified':
+                last_modified = value
+
+        debug("New folder...")
+        folder = Folder(add_date, last_modified=last_modified)
+        self.current_object = folder
+        self.current_folder.append(folder)
+        self.folder_stack.append(folder) # push new folder
+        self.current_folder = folder
+        self.objects += 1
+
+    def end_h3(self):
+        accumulator = self.accumulator
+        self.accumulator = ''
+
+        debug("Folder name: `%s'" % accumulator)
+        self.current_folder.name = accumulator
+
+    # Start a bookmark
+    def start_a(self, attrs):
+        add_date = None
+        last_visit = None
+        last_modified = None
+        keyword = ''
+        icon = None
+        charset = None
+
+        for attrname, value in attrs:
+            value = value.strip()
+            if attrname == "href":
+                href = value
+            elif attrname == "add_date":
+                add_date = value
+            elif attrname == "last_visit":
+                last_visit = value
+            elif attrname == "last_modified":
+                last_modified = value
+            elif attrname == "shortcuturl":
+                keyword = value
+            elif attrname == "icon":
+                icon = value
+            elif attrname == "last_charset":
+                charset = value
+
+        debug("Bookmark points to: `%s'" % href)
+        bookmark = Bookmark(href, add_date, last_visit, last_modified,
+           keyword=keyword, icon=icon,
+           charset=charset, parser_charset=self.charset or default_encoding)
+        self.current_object = bookmark
+        self.current_folder.append(bookmark)
+        self.urls += 1
+        self.objects += 1
+
+    def end_a(self):
+        accumulator = self.accumulator
+        self.accumulator = ''
+
+        debug("Bookmark name: `%s'" % accumulator)
+        bookmark = self.current_folder[-1]
+        bookmark.name = accumulator
+
+    def flush(self):
+        accumulator = self.accumulator
+
+        if accumulator:
+            self.accumulator = ''
+
+            current_object = self.current_object
+            if current_object:
+                current_object.comment += accumulator.strip()
+                debug("Comment: `%s'" % current_object.comment)
+
+    def start_dl(self, attrs):
+        self.flush()
+
+    do_dt = start_dl
+
+    # End of folder
+    def end_dl(self):
+        self.flush()
+        debug("End folder")
+        debug("Folder stack: %s" % dump_names(self.folder_stack))
+        if self.folder_stack:
+            del self.folder_stack[-1] # pop last folder
+            if self.folder_stack:
+                self.current_folder = self.folder_stack[-1]
+            else:
+                debug("FOLDER STACK is EMPTY!!! (1)")
+        else:
+            debug("FOLDER STACK is EMPTY!!! (2)")
+        self.current_object = None
+
+    def close(self):
+        HTMLParser.close(self)
+        if self.folder_stack:
+            raise ValueError("wrong folder stack: %s" % self.folder_stack)
+
+    def do_dd(self, attrs):
+        pass
+
+    do_p = do_dd
+
+    # Start ruler
+    def do_hr(self, attrs):
+        self.flush()
+        debug("Ruler")
+        self.current_folder.append(Ruler())
+        self.current_object = None
+        self.objects += 1
+
+    # BR in comment
+    def do_br(self, attrs):
+        self.accumulator += "<BR>"
+
+    # Allow < in the text
+    def unknown_starttag(self, tag, attrs):
+        self.accumulator += "<%s>" % tag
+
+    # Do not allow unknow end tags
+    def unknown_endtag(self, tag):
+        raise NotImplementedError("Unknow end tag `%s'" % tag)