Collect text and comments

author Oleg Broytman <phd@phdru.name>

Sat, 16 Nov 2013 10:35:41 +0000 (14:35 +0400)

committer Oleg Broytman <phd@phdru.name>

Sat, 16 Nov 2013 10:35:41 +0000 (14:35 +0400)
author Oleg Broytman <phd@phdru.name>
Sat, 16 Nov 2013 10:35:41 +0000 (14:35 +0400)
committer Oleg Broytman <phd@phdru.name>
Sat, 16 Nov 2013 10:35:41 +0000 (14:35 +0400)
diff --git a/xml b/xml

index 618c65bb99a4b3a1d2be035ac85cf371d41006fd..9ae64229fe196d91df60b81b8760409b275c01d5 100755 (executable)
--- a/xml
+++ b/xml
@@ -18,15 +18,16 @@ distinguish tags with the same name; also numbering helps to sort tags by their
  order in XML instead of sorting them by name. Attributes, text nodes and
  comments are represented as text files; attributes are shown in a file named
  "attributes", attributes are listed in the file as name=value lines (I
-deliberately ignore a small chance there is a newline character in values).
-Text nodes and comments are collected in a file named "text". The filesystem is
+deliberately ignore a small chance of newline characters in values); names and
+values are reencoded to the console encoding. Text nodes and comments are
+collected in a file named "text", stripped and reencoded. The filesystem is
  read-only.
  
  The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
  
  """
  
-__version__ = "0.2.0"
+__version__ = "0.3.0"
  __author__ = "Oleg Broytman <phd@phdru.name>"
  __copyright__ = "Copyright (C) 2013 PhiloSoft Design"
  __license__ = "GPL"
@@ -88,6 +89,20 @@ def _attrs2text(attrs):
          a.value.encode(default_encoding, "replace"))
          for a in attrs])
  
+def _collect_text(node):
+    text_accumulator = []
+    for element in node.childNodes:
+        if element.localName:
+            continue
+        elif element.nodeType == element.COMMENT_NODE:
+            text = u"<!--%s-->" % element.nodeValue
+        elif element.nodeType == element.TEXT_NODE:
+            text = element.nodeValue.strip()
+        else:
+            xml_error("Unknown node type %d" % element.nodeType)
+        if text: text_accumulator.append(text)
+    return '\n'.join(text_accumulator).encode(default_encoding, "replace")
+
  def _list(node, path=''):
      childNodes = node.childNodes
      n = 0
@@ -115,6 +130,11 @@ def _list(node, path=''):
                  print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
                      len(attr_text), subpath_encoded)
              _list(element, subpath)
+    if path:
+        text = _collect_text(node)
+        if text:
+            print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
+                len(text), path.encode(default_encoding, "replace"))
  
  def mcxml_list():
      """List the entire VFS"""
@@ -143,7 +163,7 @@ def mcxml_copyout():
          if ' ' in path_comp:
              i = int(path_comp.split(' ', 1)[0])
              node = _get_child_node(node, i)
-        elif path_comp == 'attributes':
+        elif path_comp in ('attributes', 'text'):
              break
          else:
              xml_error('Unknown file')
@@ -155,6 +175,9 @@ def mcxml_copyout():
          else:
              xml_error('There are no attributes')
  
+    if path_comp == 'text':
+        text = _collect_text(node)
+
      outfile = open(real_filename, 'w')
      outfile.write(text)
      outfile.close()
author	Oleg Broytman <phd@phdru.name>
	Sat, 16 Nov 2013 10:35:41 +0000 (14:35 +0400)
committer	Oleg Broytman <phd@phdru.name>
	Sat, 16 Nov 2013 10:35:41 +0000 (14:35 +0400)