#! /usr/bin/env python """XML Virtual FileSystem for Midnight Commander The script requires Midnight Commander 3.1+ (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/). For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d. For older versions put it in /usr/[local/][lib|share]/mc/extfs and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini. Make the script executable. For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings" file the command is "%cd"): cd file/xml://; in older versions it is cd file#xml, where "file" is the name of your XML file. See detailed installation instructions at http://phdru.name/Software/mc/xml_INSTALL.html. The VFS represents tags as directories; the directories are numbered to distinguish tags with the same name; numbering also helps to sort tags by their order in XML instead of sorting them by name. Attributes, text nodes and comments are represented as text files; attributes are shown in a file named "attributes", attributes are listed in the file as name=value lines (I deliberately ignore a small chance of newline characters in values); names and values are reencoded to the console encoding. Text nodes and comments are collected in a file named "text", stripped and reencoded. The filesystem is read-only. Implementation based on minidom doesn't understand namespaces, it just shows them among other attributes. ElementTree-based implementation doesn't show namespaces at all. Implementation based on lxml.etree shows namespaces in a separate file "namespaces". It is useful to have a top-down view on an XML structure but it's especially convenient to extract text values from tags. One can get, for example, a base64-encoded image - just walk down the VFS to the tag's directory and copy its text file to a real file. The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs """ __version__ = "1.1.3" __author__ = "Oleg Broytman " __copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design" __license__ = "GPL" force_implementation = None # Can be None for default choice, # 'lxml', 'elementtree' or 'minidom' use_minidom = True use_elementtree = False use_lxml = False import math import sys import xml.dom.minidom try: import xml.etree.ElementTree as ET except ImportError: pass else: use_elementtree = True try: import lxml.etree as etree except ImportError: pass else: use_lxml = True try: import locale use_locale = True except ImportError: use_locale = False if use_locale: # Get the default charset. try: lcAll = locale.getdefaultlocale() except locale.Error, err: print >>sys.stderr, "WARNING:", err lcAll = [] if len(lcAll) == 2: default_encoding = lcAll[1] else: try: default_encoding = locale.getpreferredencoding() except locale.Error, err: print >>sys.stderr, "WARNING:", err default_encoding = sys.getdefaultencoding() else: default_encoding = sys.getdefaultencoding() import logging logger = logging.getLogger('xml-mcextfs') log_err_handler = logging.StreamHandler(sys.stderr) logger.addHandler(log_err_handler) logger.setLevel(logging.INFO) if len(sys.argv) < 3: logger.critical("""\ XML Virtual FileSystem for Midnight Commander version %s Author: %s %s This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or /usr/[local/][lib|share]/mc/extfs. For more information read the source!""", __version__, __author__, __copyright__ ) sys.exit(1) locale.setlocale(locale.LC_ALL, '') class XmlVfs(object): """Abstract base class""" supports_namespaces = False def __init__(self): self.parse() def list(self): root_comments = self.get_root_comments() if root_comments: print "-r--r--r-- 1 user group %d Jan 1 00:00 text" % (len(root_comments)) self._list(self.getroot()) def _list(self, node, path=''): n = len(self.getchildren(node)) if n: width = int(math.log10(n)) + 1 template = "%%0%dd" % width else: template = "%d" n = 0 for element in self.getchildren(node): if not self.istag(element): continue n += 1 tag = self.getlocalname(self.gettag(element)) if path: subpath = '%s/%s %s' % (path, template % n, tag) else: subpath = '%s %s' % (template % n, tag) subpath_encoded = subpath.encode(default_encoding, "replace") print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded if self.getattrs(element): attr_text = self.attrs2text(element) print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % ( len(attr_text), subpath_encoded) if self.supports_namespaces and self.has_ns(element): ns_text = self.ns2text(element) print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/namespaces" % ( len(ns_text), subpath_encoded) text = self.collect_text(element) if text: print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % ( len(text), subpath_encoded) self._list(element, subpath) def get_tag_node(self, node, i): n = 0 for element in self.getchildren(node): if self.istag(element): n += 1 if n == i: return element xml_error('There are less than %d nodes' % i) def attrs2text(self, node): attr_accumulator = [] for name, value in self.getattrs(node): name = self.getlocalname(name).encode(default_encoding, "replace") value = value.encode(default_encoding, "replace") attr_accumulator.append("%s=%s" % (name, value)) return '\n'.join(attr_accumulator) def has_ns(self, node): return False class MiniDOMXmlVfs(XmlVfs): def parse(self): self.document = xml.dom.minidom.parse(sys.argv[2]) def getattrs(self, node): attrs = node.attributes attrs = [attrs.item(i) for i in range(attrs.length)] return [(a.name, a.value) for a in attrs] def collect_text(self, node): text_accumulator = [] for element in node.childNodes: if element.localName: continue elif element.nodeType == element.COMMENT_NODE: text = u"" % element.nodeValue elif element.nodeType == element.TEXT_NODE: text = element.nodeValue.strip() else: xml_error("Unknown node type %d" % element.nodeType) if text: text_accumulator.append(text) return '\n'.join(text_accumulator).encode(default_encoding, "replace") def getroot(self): return self.document def get_root_comments(self): return self.collect_text(self.document) def getchildren(self, node): return node.childNodes def gettag(self, node): return node.localName def istag(self, node): return bool(node.localName) def getlocalname(self, name): return name if use_elementtree or use_lxml: class CommonEtreeXmlVfs(XmlVfs): def getattrs(self, node): return node.attrib.items() def collect_text(self, node): text_accumulator = [] if node.text: text = node.text.strip() if text: text_accumulator.append(text) for element in node: if not self.istag(element): text = u"" % element.text text_accumulator.append(text) if node.tail: text = node.tail.strip() if text: text_accumulator.append(text) return '\n'.join(text_accumulator).encode(default_encoding, "replace") def getchildren(self, node): return list(node) def gettag(self, node): return node.tag def istag(self, node): return isinstance(node.tag, basestring) if use_elementtree: class ElementTreeXmlVfs(CommonEtreeXmlVfs): def parse(self): # Copied from http://effbot.org/zone/element-pi.htm class PIParser(ET.XMLTreeBuilder): def __init__(self): ET.XMLTreeBuilder.__init__(self) # assumes ElementTree 1.2.X self._parser.CommentHandler = self.handle_comment self._parser.ProcessingInstructionHandler = self.handle_pi self._target.start("document", {}) def close(self): self._target.end("document") return ET.XMLTreeBuilder.close(self) def handle_comment(self, data): self._target.start(ET.Comment, {}) self._target.data(data) self._target.end(ET.Comment) def handle_pi(self, target, data): self._target.start(ET.PI, {}) self._target.data(target + " " + data) self._target.end(ET.PI) self.document = ET.parse(sys.argv[2], PIParser()) def getroot(self): return self.document.getroot() def get_root_comments(self): text_accumulator = [] for element in self.getroot(): if not self.istag(element): text = u"" % element.text text_accumulator.append(text) return '\n'.join(text_accumulator).encode(default_encoding, "replace") def getlocalname(self, name): if name.startswith('{'): name = name.split('}', 1)[1] # Remove XML namespace return name if use_lxml: class LxmlEtreeXmlVfs(CommonEtreeXmlVfs): supports_namespaces = True def parse(self): self.document = etree.parse(sys.argv[2]) def getroot(self): return [self.document.getroot()] def get_root_comments(self): text_accumulator = [] for element in self.document.getroot().itersiblings(tag=etree.Comment, preceding=True): text = u"" % element.text text_accumulator.append(text) return '\n'.join(text_accumulator).encode(default_encoding, "replace") def getlocalname(self, name): return etree.QName(name).localname def _get_local_ns(self, node): this_nsmap = node.nsmap parent = node.getparent() if parent is not None: parent_nsmap = parent.nsmap for key in parent_nsmap: if this_nsmap[key] == parent_nsmap[key]: del this_nsmap[key] return this_nsmap def has_ns(self, node): return bool(self._get_local_ns(node)) def ns2text(self, node): ns_accumulator = [] for name, value in self._get_local_ns(node).items(): if name: name = name.encode(default_encoding, "replace") else: name = 'xmlns' value = value.encode(default_encoding, "replace") ns_accumulator.append("%s=%s" % (name, value)) return '\n'.join(ns_accumulator) def build_xmlvfs(): if force_implementation is None: if use_lxml: return LxmlEtreeXmlVfs() elif use_elementtree: return ElementTreeXmlVfs() else: return MiniDOMXmlVfs() elif force_implementation == 'minidom': return MiniDOMXmlVfs() elif force_implementation == 'elementtree': return ElementTreeXmlVfs() elif force_implementation == 'lxml': return LxmlEtreeXmlVfs() else: raise ValueError('Unknown implementation "%s", expected "minidom", "elementtree" or "lxml"' % force_implementation) def mcxml_list(): """List the entire VFS""" xmlvfs = build_xmlvfs() xmlvfs.list() def mcxml_copyout(): """Extract a file from the VFS""" xmlvfs = build_xmlvfs() xml_filename = sys.argv[3] real_filename = sys.argv[4] node = xmlvfs.getroot() for path_comp in xml_filename.split('/'): if ' ' in path_comp: i = int(path_comp.split(' ', 1)[0]) node = xmlvfs.get_tag_node(node, i) elif path_comp in ('attributes', 'namespaces', 'text'): break else: xml_error('Unknown file') if path_comp == 'attributes': if xmlvfs.getattrs(node): text = xmlvfs.attrs2text(node) else: xml_error('There are no attributes') elif path_comp == 'namespaces': if xmlvfs.supports_namespaces and xmlvfs.has_ns(node): text = xmlvfs.ns2text(node) else: xml_error('There are no namespaces') elif path_comp == 'text': if '/' in xml_filename: text = xmlvfs.collect_text(node) else: text = xmlvfs.get_root_comments() else: xml_error('Unknown file') outfile = open(real_filename, 'w') outfile.write(text) outfile.close() def mcxml_copyin(): """Put a file to the VFS""" sys.exit("XML VFS doesn't support adding files (read-only filesystem)") def mcxml_rm(): """Remove a file from the VFS""" sys.exit("XML VFS doesn't support removing files/directories (read-only filesystem)") mcxml_rmdir = mcxml_rm def mcxml_mkdir(): """Create a directory in the VFS""" sys.exit("XML VFS doesn't support creating directories (read-only filesystem)") def xml_error(error_str): logger.critical("Error walking XML file: %s", error_str) sys.exit(1) command = sys.argv[1] procname = "mcxml_" + command g = globals() if not g.has_key(procname): logger.critical("Unknown command %s", command) sys.exit(1) try: g[procname]() except SystemExit: raise except: logger.exception("Error during run")