]> git.phdru.name Git - extfs.d.git/blobdiff - xml
xml version 1.1.3: fix a minor bug in handling namespaces
[extfs.d.git] / xml
diff --git a/xml b/xml
deleted file mode 120000 (symlink)
index e9fc63c58a9ec8565a9c6e6e5dfb5acac3b95cdd..0000000000000000000000000000000000000000
--- a/xml
+++ /dev/null
@@ -1 +0,0 @@
-xml-unified
\ No newline at end of file
diff --git a/xml b/xml
new file mode 100755 (executable)
index 0000000000000000000000000000000000000000..4822ab8c6005edc39c7995afe1fd856adcc62883
--- /dev/null
+++ b/xml
@@ -0,0 +1,451 @@
+#! /usr/bin/env python
+"""XML Virtual FileSystem for Midnight Commander
+
+The script requires Midnight Commander 3.1+
+(http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
+
+For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
+For older versions put it in /usr/[local/][lib|share]/mc/extfs
+and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
+Make the script executable.
+
+For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
+file the command is "%cd"): cd file/xml://; in older versions it is
+cd file#xml, where "file" is the name of your XML file.
+
+See detailed installation instructions at
+http://phdru.name/Software/mc/xml_INSTALL.html.
+
+The VFS represents tags as directories; the directories are numbered to
+distinguish tags with the same name; numbering also helps to sort tags by their
+order in XML instead of sorting them by name. Attributes, text nodes and
+comments are represented as text files; attributes are shown in a file named
+"attributes", attributes are listed in the file as name=value lines (I
+deliberately ignore a small chance of newline characters in values); names and
+values are reencoded to the console encoding. Text nodes and comments are
+collected in a file named "text", stripped and reencoded. The filesystem is
+read-only.
+
+Implementation based on minidom doesn't understand namespaces, it just shows
+them among other attributes. ElementTree-based implementation doesn't show
+namespaces at all. Implementation based on lxml.etree shows namespaces in a
+separate file "namespaces".
+
+It is useful to have a top-down view on an XML structure but it's especially
+convenient to extract text values from tags. One can get, for example, a
+base64-encoded image - just walk down the VFS to the tag's directory and copy
+its text file to a real file.
+
+The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
+
+"""
+
+__version__ = "1.1.3"
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design"
+__license__ = "GPL"
+
+force_implementation = None  # Can be None for default choice,
+                             # 'lxml', 'elementtree' or 'minidom'
+
+use_minidom = True
+use_elementtree = False
+use_lxml = False
+
+import math
+import sys
+import xml.dom.minidom
+
+try:
+    import xml.etree.ElementTree as ET
+except ImportError:
+    pass
+else:
+    use_elementtree = True
+
+try:
+    import lxml.etree as etree
+except ImportError:
+    pass
+else:
+    use_lxml = True
+
+try:
+   import locale
+   use_locale = True
+except ImportError:
+   use_locale = False
+
+if use_locale:
+   # Get the default charset.
+   try:
+      lcAll = locale.getdefaultlocale()
+   except locale.Error, err:
+      print >>sys.stderr, "WARNING:", err
+      lcAll = []
+
+   if len(lcAll) == 2:
+      default_encoding = lcAll[1]
+   else:
+      try:
+         default_encoding = locale.getpreferredencoding()
+      except locale.Error, err:
+         print >>sys.stderr, "WARNING:", err
+         default_encoding = sys.getdefaultencoding()
+else:
+   default_encoding = sys.getdefaultencoding()
+
+import logging
+logger = logging.getLogger('xml-mcextfs')
+log_err_handler = logging.StreamHandler(sys.stderr)
+logger.addHandler(log_err_handler)
+logger.setLevel(logging.INFO)
+
+if len(sys.argv) < 3:
+    logger.critical("""\
+XML Virtual FileSystem for Midnight Commander version %s
+Author: %s
+%s
+
+This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
+/usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
+   __version__, __author__, __copyright__
+)
+    sys.exit(1)
+
+
+locale.setlocale(locale.LC_ALL, '')
+
+
+class XmlVfs(object):
+    """Abstract base class"""
+
+    supports_namespaces = False
+
+    def __init__(self):
+        self.parse()
+
+    def list(self):
+        root_comments = self.get_root_comments()
+        if root_comments:
+            print "-r--r--r-- 1 user group %d Jan 1 00:00 text" % (len(root_comments))
+        self._list(self.getroot())
+
+    def _list(self, node, path=''):
+        n = len(self.getchildren(node))
+        if n:
+            width = int(math.log10(n)) + 1
+            template = "%%0%dd" % width
+        else:
+            template = "%d"
+        n = 0
+        for element in self.getchildren(node):
+            if not self.istag(element):
+                continue
+            n += 1
+            tag = self.getlocalname(self.gettag(element))
+            if path:
+                subpath = '%s/%s %s' % (path, template % n, tag)
+            else:
+                subpath = '%s %s' % (template % n, tag)
+            subpath_encoded = subpath.encode(default_encoding, "replace")
+            print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
+            if self.getattrs(element):
+                attr_text = self.attrs2text(element)
+                print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
+                    len(attr_text), subpath_encoded)
+            if self.supports_namespaces and self.has_ns(element):
+                ns_text = self.ns2text(element)
+                print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/namespaces" % (
+                    len(ns_text), subpath_encoded)
+            text = self.collect_text(element)
+            if text:
+                print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
+                    len(text), subpath_encoded)
+            self._list(element, subpath)
+
+    def get_tag_node(self, node, i):
+        n = 0
+        for element in self.getchildren(node):
+            if self.istag(element):
+                n += 1
+                if n == i:
+                    return element
+        xml_error('There are less than %d nodes' % i)
+
+    def attrs2text(self, node):
+        attr_accumulator = []
+        for name, value in self.getattrs(node):
+            name = self.getlocalname(name).encode(default_encoding, "replace")
+            value = value.encode(default_encoding, "replace")
+            attr_accumulator.append("%s=%s" % (name, value))
+        return '\n'.join(attr_accumulator)
+
+    def has_ns(self, node):
+        return False
+
+
+class MiniDOMXmlVfs(XmlVfs):
+    def parse(self):
+        self.document = xml.dom.minidom.parse(sys.argv[2])
+
+    def getattrs(self, node):
+        attrs = node.attributes
+        attrs = [attrs.item(i) for i in range(attrs.length)]
+        return [(a.name, a.value) for a in attrs]
+
+    def collect_text(self, node):
+        text_accumulator = []
+        for element in node.childNodes:
+            if element.localName:
+                continue
+            elif element.nodeType == element.COMMENT_NODE:
+                text = u"<!--%s-->" % element.nodeValue
+            elif element.nodeType == element.TEXT_NODE:
+                text = element.nodeValue.strip()
+            else:
+                xml_error("Unknown node type %d" % element.nodeType)
+            if text: text_accumulator.append(text)
+        return '\n'.join(text_accumulator).encode(default_encoding, "replace")
+
+    def getroot(self):
+        return self.document
+
+    def get_root_comments(self):
+        return self.collect_text(self.document)
+
+    def getchildren(self, node):
+        return node.childNodes
+
+    def gettag(self, node):
+        return node.localName
+
+    def istag(self, node):
+        return bool(node.localName)
+
+    def getlocalname(self, name):
+        return name
+
+
+if use_elementtree or use_lxml:
+    class CommonEtreeXmlVfs(XmlVfs):
+        def getattrs(self, node):
+            return node.attrib.items()
+
+        def collect_text(self, node):
+            text_accumulator = []
+            if node.text:
+                text = node.text.strip()
+                if text: text_accumulator.append(text)
+            for element in node:
+                if not self.istag(element):
+                    text = u"<!--%s-->" % element.text
+                    text_accumulator.append(text)
+            if node.tail:
+                text = node.tail.strip()
+                if text: text_accumulator.append(text)
+            return '\n'.join(text_accumulator).encode(default_encoding, "replace")
+
+        def getchildren(self, node):
+            return list(node)
+
+        def gettag(self, node):
+            return node.tag
+
+        def istag(self, node):
+            return isinstance(node.tag, basestring)
+
+
+if use_elementtree:
+    class ElementTreeXmlVfs(CommonEtreeXmlVfs):
+        def parse(self):
+            # Copied from http://effbot.org/zone/element-pi.htm
+
+            class PIParser(ET.XMLTreeBuilder):
+
+                def __init__(self):
+                    ET.XMLTreeBuilder.__init__(self)
+                    # assumes ElementTree 1.2.X
+                    self._parser.CommentHandler = self.handle_comment
+                    self._parser.ProcessingInstructionHandler = self.handle_pi
+                    self._target.start("document", {})
+
+                def close(self):
+                    self._target.end("document")
+                    return ET.XMLTreeBuilder.close(self)
+
+                def handle_comment(self, data):
+                    self._target.start(ET.Comment, {})
+                    self._target.data(data)
+                    self._target.end(ET.Comment)
+
+                def handle_pi(self, target, data):
+                    self._target.start(ET.PI, {})
+                    self._target.data(target + " " + data)
+                    self._target.end(ET.PI)
+
+            self.document = ET.parse(sys.argv[2], PIParser())
+
+        def getroot(self):
+            return self.document.getroot()
+
+        def get_root_comments(self):
+            text_accumulator = []
+            for element in self.getroot():
+                if not self.istag(element):
+                    text = u"<!--%s-->" % element.text
+                    text_accumulator.append(text)
+            return '\n'.join(text_accumulator).encode(default_encoding, "replace")
+
+        def getlocalname(self, name):
+            if name.startswith('{'):
+                name = name.split('}', 1)[1]  # Remove XML namespace
+            return name
+
+
+if use_lxml:
+    class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
+        supports_namespaces = True
+
+        def parse(self):
+            self.document = etree.parse(sys.argv[2])
+
+        def getroot(self):
+            return [self.document.getroot()]
+
+        def get_root_comments(self):
+            text_accumulator = []
+            for element in self.document.getroot().itersiblings(tag=etree.Comment, preceding=True):
+                text = u"<!--%s-->" % element.text
+                text_accumulator.append(text)
+            return '\n'.join(text_accumulator).encode(default_encoding, "replace")
+
+        def getlocalname(self, name):
+            return etree.QName(name).localname
+
+        def _get_local_ns(self, node):
+            this_nsmap = node.nsmap
+            parent = node.getparent()
+            if parent is not None:
+                parent_nsmap = parent.nsmap
+                for key in parent_nsmap:
+                    if this_nsmap[key] == parent_nsmap[key]:
+                        del this_nsmap[key]
+            return this_nsmap
+
+        def has_ns(self, node):
+            return bool(self._get_local_ns(node))
+
+        def ns2text(self, node):
+            ns_accumulator = []
+            for name, value in self._get_local_ns(node).items():
+                if name:
+                    name = name.encode(default_encoding, "replace")
+                else:
+                    name = 'xmlns'
+                value = value.encode(default_encoding, "replace")
+                ns_accumulator.append("%s=%s" % (name, value))
+            return '\n'.join(ns_accumulator)
+
+
+def build_xmlvfs():
+    if force_implementation is None:
+        if use_lxml:
+            return LxmlEtreeXmlVfs()
+        elif use_elementtree:
+            return ElementTreeXmlVfs()
+        else:
+            return MiniDOMXmlVfs()
+    elif force_implementation == 'minidom':
+        return MiniDOMXmlVfs()
+    elif force_implementation == 'elementtree':
+        return ElementTreeXmlVfs()
+    elif force_implementation == 'lxml':
+        return LxmlEtreeXmlVfs()
+    else:
+        raise ValueError('Unknown implementation "%s", expected "minidom", "elementtree" or "lxml"' % force_implementation)
+
+
+def mcxml_list():
+    """List the entire VFS"""
+
+    xmlvfs = build_xmlvfs()
+    xmlvfs.list()
+
+
+def mcxml_copyout():
+    """Extract a file from the VFS"""
+
+    xmlvfs = build_xmlvfs()
+    xml_filename = sys.argv[3]
+    real_filename = sys.argv[4]
+
+    node = xmlvfs.getroot()
+    for path_comp in xml_filename.split('/'):
+        if ' ' in path_comp:
+            i = int(path_comp.split(' ', 1)[0])
+            node = xmlvfs.get_tag_node(node, i)
+        elif path_comp in ('attributes', 'namespaces', 'text'):
+            break
+        else:
+            xml_error('Unknown file')
+
+    if path_comp == 'attributes':
+        if xmlvfs.getattrs(node):
+            text = xmlvfs.attrs2text(node)
+        else:
+            xml_error('There are no attributes')
+
+    elif path_comp == 'namespaces':
+        if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
+            text = xmlvfs.ns2text(node)
+        else:
+            xml_error('There are no namespaces')
+
+    elif path_comp == 'text':
+        if '/' in xml_filename:
+            text = xmlvfs.collect_text(node)
+        else:
+            text = xmlvfs.get_root_comments()
+
+    else:
+        xml_error('Unknown file')
+
+    outfile = open(real_filename, 'w')
+    outfile.write(text)
+    outfile.close()
+
+
+def mcxml_copyin():
+    """Put a file to the VFS"""
+    sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
+
+def mcxml_rm():
+    """Remove a file from the VFS"""
+    sys.exit("XML VFS doesn't support removing files/directories (read-only filesystem)")
+
+mcxml_rmdir = mcxml_rm
+
+def mcxml_mkdir():
+    """Create a directory in the VFS"""
+    sys.exit("XML VFS doesn't support creating directories (read-only filesystem)")
+
+
+def xml_error(error_str):
+    logger.critical("Error walking XML file: %s", error_str)
+    sys.exit(1)
+
+command = sys.argv[1]
+procname = "mcxml_" + command
+
+g = globals()
+if not g.has_key(procname):
+    logger.critical("Unknown command %s", command)
+    sys.exit(1)
+
+try:
+    g[procname]()
+except SystemExit:
+    raise
+except:
+    logger.exception("Error during run")