]> git.phdru.name Git - extfs.d.git/blobdiff - xml
xml version 1.1.3: fix a minor bug in handling namespaces
[extfs.d.git] / xml
diff --git a/xml b/xml
index 7c7ffd5181c38d2cc8299284b8fe2150998e0786..4822ab8c6005edc39c7995afe1fd856adcc62883 100755 (executable)
--- a/xml
+++ b/xml
@@ -13,8 +13,11 @@ For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
 file the command is "%cd"): cd file/xml://; in older versions it is
 cd file#xml, where "file" is the name of your XML file.
 
+See detailed installation instructions at
+http://phdru.name/Software/mc/xml_INSTALL.html.
+
 The VFS represents tags as directories; the directories are numbered to
-distinguish tags with the same name; also numbering helps to sort tags by their
+distinguish tags with the same name; numbering also helps to sort tags by their
 order in XML instead of sorting them by name. Attributes, text nodes and
 comments are represented as text files; attributes are shown in a file named
 "attributes", attributes are listed in the file as name=value lines (I
@@ -26,7 +29,7 @@ read-only.
 Implementation based on minidom doesn't understand namespaces, it just shows
 them among other attributes. ElementTree-based implementation doesn't show
 namespaces at all. Implementation based on lxml.etree shows namespaces in a
-separate file "namespaces"; every child tag includes its parent's namespaces.
+separate file "namespaces".
 
 It is useful to have a top-down view on an XML structure but it's especially
 convenient to extract text values from tags. One can get, for example, a
@@ -37,9 +40,9 @@ The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
 
 """
 
-__version__ = "0.6.0"
+__version__ = "1.1.3"
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2013 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design"
 __license__ = "GPL"
 
 force_implementation = None  # Can be None for default choice,
@@ -123,6 +126,9 @@ class XmlVfs(object):
         self.parse()
 
     def list(self):
+        root_comments = self.get_root_comments()
+        if root_comments:
+            print "-r--r--r-- 1 user group %d Jan 1 00:00 text" % (len(root_comments))
         self._list(self.getroot())
 
     def _list(self, node, path=''):
@@ -144,7 +150,7 @@ class XmlVfs(object):
                 subpath = '%s %s' % (template % n, tag)
             subpath_encoded = subpath.encode(default_encoding, "replace")
             print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
-            if self.hasattrs(element):
+            if self.getattrs(element):
                 attr_text = self.attrs2text(element)
                 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
                     len(attr_text), subpath_encoded)
@@ -158,10 +164,7 @@ class XmlVfs(object):
                     len(text), subpath_encoded)
             self._list(element, subpath)
 
-    def has_ns(self, node):
-        return False
-
-    def get_child_node(self, node, i):
+    def get_tag_node(self, node, i):
         n = 0
         for element in self.getchildren(node):
             if self.istag(element):
@@ -170,21 +173,26 @@ class XmlVfs(object):
                     return element
         xml_error('There are less than %d nodes' % i)
 
+    def attrs2text(self, node):
+        attr_accumulator = []
+        for name, value in self.getattrs(node):
+            name = self.getlocalname(name).encode(default_encoding, "replace")
+            value = value.encode(default_encoding, "replace")
+            attr_accumulator.append("%s=%s" % (name, value))
+        return '\n'.join(attr_accumulator)
+
+    def has_ns(self, node):
+        return False
+
 
 class MiniDOMXmlVfs(XmlVfs):
     def parse(self):
         self.document = xml.dom.minidom.parse(sys.argv[2])
 
-    def hasattrs(self, node):
-        return bool(node.attributes)
-
-    def attrs2text(self, node):
+    def getattrs(self, node):
         attrs = node.attributes
         attrs = [attrs.item(i) for i in range(attrs.length)]
-        return '\n'.join(["%s=%s" %
-            (a.name.encode(default_encoding, "replace"),
-            a.value.encode(default_encoding, "replace"))
-            for a in attrs])
+        return [(a.name, a.value) for a in attrs]
 
     def collect_text(self, node):
         text_accumulator = []
@@ -203,6 +211,9 @@ class MiniDOMXmlVfs(XmlVfs):
     def getroot(self):
         return self.document
 
+    def get_root_comments(self):
+        return self.collect_text(self.document)
+
     def getchildren(self, node):
         return node.childNodes
 
@@ -218,8 +229,8 @@ class MiniDOMXmlVfs(XmlVfs):
 
 if use_elementtree or use_lxml:
     class CommonEtreeXmlVfs(XmlVfs):
-        def hasattrs(self, node):
-            return bool(node.attrib)
+        def getattrs(self, node):
+            return node.attrib.items()
 
         def collect_text(self, node):
             text_accumulator = []
@@ -235,9 +246,6 @@ if use_elementtree or use_lxml:
                 if text: text_accumulator.append(text)
             return '\n'.join(text_accumulator).encode(default_encoding, "replace")
 
-        def getroot(self):
-            return self.document.getroot()
-
         def getchildren(self, node):
             return list(node)
 
@@ -251,7 +259,7 @@ if use_elementtree or use_lxml:
 if use_elementtree:
     class ElementTreeXmlVfs(CommonEtreeXmlVfs):
         def parse(self):
-            # Copied from http://effbot.org/zone/element-pi.ht
+            # Copied from http://effbot.org/zone/element-pi.htm
 
             class PIParser(ET.XMLTreeBuilder):
 
@@ -278,15 +286,16 @@ if use_elementtree:
 
             self.document = ET.parse(sys.argv[2], PIParser())
 
-        def attrs2text(self, node):
-            attr_accumulator = []
-            for name, value in node.attrib.items():
-                name = name.encode(default_encoding, "replace")
-                value = value.encode(default_encoding, "replace")
-                if name.startswith('{'):
-                    name = name.split('}', 1)[1]  # Remove XML namespace
-                attr_accumulator.append("%s=%s" % (name, value))
-            return '\n'.join(attr_accumulator)
+        def getroot(self):
+            return self.document.getroot()
+
+        def get_root_comments(self):
+            text_accumulator = []
+            for element in self.getroot():
+                if not self.istag(element):
+                    text = u"<!--%s-->" % element.text
+                    text_accumulator.append(text)
+            return '\n'.join(text_accumulator).encode(default_encoding, "replace")
 
         def getlocalname(self, name):
             if name.startswith('{'):
@@ -301,31 +310,43 @@ if use_lxml:
         def parse(self):
             self.document = etree.parse(sys.argv[2])
 
-        def attrs2text(self, node):
-            attr_accumulator = []
-            for name, value in node.attrib.items():
-                name = etree.QName(name).localname.encode(default_encoding, "replace")
-                value = value.encode(default_encoding, "replace")
-                attr_accumulator.append("%s=%s" % (name, value))
-            return '\n'.join(attr_accumulator)
+        def getroot(self):
+            return [self.document.getroot()]
+
+        def get_root_comments(self):
+            text_accumulator = []
+            for element in self.document.getroot().itersiblings(tag=etree.Comment, preceding=True):
+                text = u"<!--%s-->" % element.text
+                text_accumulator.append(text)
+            return '\n'.join(text_accumulator).encode(default_encoding, "replace")
+
+        def getlocalname(self, name):
+            return etree.QName(name).localname
+
+        def _get_local_ns(self, node):
+            this_nsmap = node.nsmap
+            parent = node.getparent()
+            if parent is not None:
+                parent_nsmap = parent.nsmap
+                for key in parent_nsmap:
+                    if this_nsmap[key] == parent_nsmap[key]:
+                        del this_nsmap[key]
+            return this_nsmap
 
         def has_ns(self, node):
-            return bool(node.nsmap)
+            return bool(self._get_local_ns(node))
 
         def ns2text(self, node):
             ns_accumulator = []
-            for name, value in node.nsmap.items():
-                name = name.encode(default_encoding, "replace")
+            for name, value in self._get_local_ns(node).items():
+                if name:
+                    name = name.encode(default_encoding, "replace")
+                else:
+                    name = 'xmlns'
                 value = value.encode(default_encoding, "replace")
                 ns_accumulator.append("%s=%s" % (name, value))
             return '\n'.join(ns_accumulator)
 
-        def getroot(self):
-            return [self.document.getroot()]
-
-        def getlocalname(self, name):
-            return etree.QName(name).localname
-
 
 def build_xmlvfs():
     if force_implementation is None:
@@ -363,26 +384,29 @@ def mcxml_copyout():
     for path_comp in xml_filename.split('/'):
         if ' ' in path_comp:
             i = int(path_comp.split(' ', 1)[0])
-            node = xmlvfs.get_child_node(node, i)
+            node = xmlvfs.get_tag_node(node, i)
         elif path_comp in ('attributes', 'namespaces', 'text'):
             break
         else:
             xml_error('Unknown file')
 
     if path_comp == 'attributes':
-        if xmlvfs.hasattrs(node):
+        if xmlvfs.getattrs(node):
             text = xmlvfs.attrs2text(node)
         else:
             xml_error('There are no attributes')
 
     elif path_comp == 'namespaces':
-        if xmlvfs.has_ns(node):
+        if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
             text = xmlvfs.ns2text(node)
         else:
-            xml_error('There are no attributes')
+            xml_error('There are no namespaces')
 
     elif path_comp == 'text':
-        text = xmlvfs.collect_text(node)
+        if '/' in xml_filename:
+            text = xmlvfs.collect_text(node)
+        else:
+            text = xmlvfs.get_root_comments()
 
     else:
         xml_error('Unknown file')