#! /usr/bin/env python """XML Virtual FileSystem for Midnight Commander The script requires Midnight Commander 3.1+ (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/). For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d. For older versions put it in /usr/[local/][lib|share]/mc/extfs and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini. Make the script executable. For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings" file the command is "%cd"): cd file/xml://; in older versions it is cd file#xml, where "file" is the name of your XML file. See detailed installation instructions at http://phdru.name/Software/mc/xml_INSTALL.html. The VFS represents tags as directories; the directories are numbered to distinguish tags with the same name; numbering also helps to sort tags by their order in XML instead of sorting them by name and prevents name clash when tag names coincide with the names of special files used by XML VFS. Attributes, text nodes and comments are represented as text files; attributes are shown in a file named "attributes", attributes are listed in the file as name=value lines (I deliberately ignore a small chance of newline characters in values); names and values are reencoded to the console encoding. Text nodes and comments are collected in a file named "text", stripped and reencoded. The filesystem is read-only. Implementation based on minidom doesn't understand namespaces, it just shows them among other attributes. ElementTree-based implementation doesn't show namespaces at all. Implementation based on lxml.etree shows namespaces in a separate file "namespaces". It is useful to have a top-down view on an XML structure but it's especially convenient to extract text values from tags. One can get, for example, a base64-encoded image - just walk down the VFS to the tag's directory and copy its text file to a real file. The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs """ __version__ = "1.1.5" __author__ = "Oleg Broytman " __copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design" __license__ = "GPL" # Can be None for default choice, 'lxml', 'elementtree' or 'minidom'. force_implementation = None use_minidom = True use_elementtree = False use_lxml = False import math from os.path import getmtime import sys from time import localtime import xml.dom.minidom try: import xml.etree.ElementTree as ET except ImportError: pass else: use_elementtree = True try: import lxml.etree as etree except ImportError: pass else: use_lxml = True try: import locale use_locale = True except ImportError: use_locale = False if use_locale: # Get the default charset. try: lcAll = locale.getdefaultlocale() except locale.Error, err: print >>sys.stderr, "WARNING:", err lcAll = [] if len(lcAll) == 2: default_encoding = lcAll[1] else: try: default_encoding = locale.getpreferredencoding() except locale.Error, err: print >>sys.stderr, "WARNING:", err default_encoding = sys.getdefaultencoding() else: default_encoding = sys.getdefaultencoding() import logging logger = logging.getLogger('xml-mcextfs') log_err_handler = logging.StreamHandler(sys.stderr) logger.addHandler(log_err_handler) logger.setLevel(logging.INFO) if len(sys.argv) < 3: logger.critical("""\ XML Virtual FileSystem for Midnight Commander version %s Author: %s %s This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or /usr/[local/][lib|share]/mc/extfs. For more information read the source!""", __version__, __author__, __copyright__) sys.exit(1) locale.setlocale(locale.LC_ALL, '') class XmlVfs(object): """Abstract base class""" supports_namespaces = False def __init__(self): self.xml_file = sys.argv[2] self.parse() def list(self): Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5] self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M) root_comments = self.get_root_comments() if root_comments: print "-r--r--r-- 1 user group %d %s text" % ( len(root_comments), self.xml_file_dt) self._list(self.getroot()) def _list(self, node, path=''): n = len(self.getchildren(node)) if n: width = int(math.log10(n)) + 1 template = "%%0%dd" % width else: template = "%d" n = 0 for element in self.getchildren(node): if not self.istag(element): continue n += 1 tag = self.getlocalname(self.gettag(element)) if path: subpath = '%s/%s %s' % (path, template % n, tag) else: subpath = '%s %s' % (template % n, tag) subpath_encoded = subpath.encode(default_encoding, "replace") print "dr-xr-xr-x 1 user group 0 %s %s" % ( self.xml_file_dt, subpath_encoded) if self.getattrs(element): attr_text = self.attrs2text(element) print "-r--r--r-- 1 user group %d %s %s/attributes" % ( len(attr_text), self.xml_file_dt, subpath_encoded) if self.supports_namespaces and self.has_ns(element): ns_text = self.ns2text(element) print "-r--r--r-- 1 user group %d %s %s/namespaces" % ( len(ns_text), self.xml_file_dt, subpath_encoded) text = self.collect_text(element) if text: print "-r--r--r-- 1 user group %d %s %s/text" % ( len(text), self.xml_file_dt, subpath_encoded) self._list(element, subpath) def get_tag_node(self, node, i): n = 0 for element in self.getchildren(node): if self.istag(element): n += 1 if n == i: return element xml_error('There are less than %d nodes' % i) def attrs2text(self, node): attr_accumulator = [] for name, value in self.getattrs(node): name = self.getlocalname(name).encode(default_encoding, "replace") value = value.encode(default_encoding, "replace") attr_accumulator.append("%s=%s" % (name, value)) return '\n'.join(attr_accumulator) def has_ns(self, node): return False class MiniDOMXmlVfs(XmlVfs): def parse(self): self.document = xml.dom.minidom.parse(self.xml_file) def getattrs(self, node): attrs = node.attributes attrs = [attrs.item(i) for i in range(attrs.length)] return [(a.name, a.value) for a in attrs] def collect_text(self, node): text_accumulator = [] for element in node.childNodes: if element.localName: continue elif element.nodeType == element.COMMENT_NODE: text = u"" % element.nodeValue elif element.nodeType == element.TEXT_NODE: text = element.nodeValue.strip() else: xml_error("Unknown node type %d" % element.nodeType) if text: text_accumulator.append(text) return '\n'.join(text_accumulator).encode(default_encoding, "replace") def getroot(self): return self.document def get_root_comments(self): return self.collect_text(self.document) def getchildren(self, node): return node.childNodes def gettag(self, node): return node.localName def istag(self, node): return bool(node.localName) def getlocalname(self, name): return name if use_elementtree or use_lxml: class CommonEtreeXmlVfs(XmlVfs): def getattrs(self, node): return node.attrib.items() def collect_text(self, node): text_accumulator = [] if node.text: text = node.text.strip() if text: text_accumulator.append(text) for element in node: if not self.istag(element): text = u"" % element.text text_accumulator.append(text) if node.tail: text = node.tail.strip() if text: text_accumulator.append(text) return '\n'.join(text_accumulator).encode( default_encoding, "replace") def getchildren(self, node): return list(node) def gettag(self, node): return node.tag def istag(self, node): return isinstance(node.tag, basestring) if use_elementtree: class ElementTreeXmlVfs(CommonEtreeXmlVfs): def parse(self): # Copied from http://effbot.org/zone/element-pi.htm class PIParser(ET.XMLTreeBuilder): def __init__(self): ET.XMLTreeBuilder.__init__(self) # assumes ElementTree 1.2.X self._parser.CommentHandler = self.handle_comment self._parser.ProcessingInstructionHandler = self.handle_pi self._target.start("document", {}) def close(self): self._target.end("document") return ET.XMLTreeBuilder.close(self) def handle_comment(self, data): self._target.start(ET.Comment, {}) self._target.data(data) self._target.end(ET.Comment) def handle_pi(self, target, data): self._target.start(ET.PI, {}) self._target.data(target + " " + data) self._target.end(ET.PI) self.document = ET.parse(self.xml_file, PIParser()) def getroot(self): return self.document.getroot() def get_root_comments(self): text_accumulator = [] for element in self.getroot(): if not self.istag(element): text = u"" % element.text text_accumulator.append(text) return '\n'.join(text_accumulator).encode( default_encoding, "replace") def getlocalname(self, name): if name.startswith('{'): name = name.split('}', 1)[1] # Remove XML namespace return name if use_lxml: class LxmlEtreeXmlVfs(CommonEtreeXmlVfs): supports_namespaces = True def parse(self): self.document = etree.parse(self.xml_file) def getroot(self): return [self.document.getroot()] def get_root_comments(self): text_accumulator = [] for element in self.document.getroot().itersiblings( tag=etree.Comment, preceding=True): text = u"" % element.text text_accumulator.append(text) return '\n'.join(text_accumulator).encode( default_encoding, "replace") def getlocalname(self, name): return etree.QName(name).localname def _get_local_ns(self, node): this_nsmap = node.nsmap parent = node.getparent() if parent is not None: parent_nsmap = parent.nsmap for key in parent_nsmap: if this_nsmap[key] == parent_nsmap[key]: del this_nsmap[key] return this_nsmap def has_ns(self, node): return bool(self._get_local_ns(node)) def ns2text(self, node): ns_accumulator = [] for name, value in self._get_local_ns(node).items(): if name: name = name.encode(default_encoding, "replace") else: name = 'xmlns' value = value.encode(default_encoding, "replace") ns_accumulator.append("%s=%s" % (name, value)) return '\n'.join(ns_accumulator) def build_xmlvfs(): if force_implementation is None: if use_lxml: return LxmlEtreeXmlVfs() elif use_elementtree: return ElementTreeXmlVfs() else: return MiniDOMXmlVfs() elif force_implementation == 'minidom': return MiniDOMXmlVfs() elif force_implementation == 'elementtree': return ElementTreeXmlVfs() elif force_implementation == 'lxml': return LxmlEtreeXmlVfs() else: raise ValueError('Unknown implementation "%s", expected "minidom", ' '"elementtree" or "lxml"' % force_implementation) def mcxml_list(): """List the entire VFS""" xmlvfs = build_xmlvfs() xmlvfs.list() def mcxml_copyout(): """Extract a file from the VFS""" xmlvfs = build_xmlvfs() xml_filename = sys.argv[3] real_filename = sys.argv[4] node = xmlvfs.getroot() for path_comp in xml_filename.split('/'): if ' ' in path_comp: i = int(path_comp.split(' ', 1)[0]) node = xmlvfs.get_tag_node(node, i) elif path_comp in ('attributes', 'namespaces', 'text'): break else: xml_error('Unknown file') if path_comp == 'attributes': if xmlvfs.getattrs(node): text = xmlvfs.attrs2text(node) else: xml_error('There are no attributes') elif path_comp == 'namespaces': if xmlvfs.supports_namespaces and xmlvfs.has_ns(node): text = xmlvfs.ns2text(node) else: xml_error('There are no namespaces') elif path_comp == 'text': if '/' in xml_filename: text = xmlvfs.collect_text(node) else: text = xmlvfs.get_root_comments() else: xml_error('Unknown file') outfile = open(real_filename, 'w') outfile.write(text) outfile.close() def mcxml_copyin(): """Put a file to the VFS""" sys.exit("XML VFS doesn't support adding files (read-only filesystem)") def mcxml_rm(): """Remove a file from the VFS""" sys.exit("XML VFS doesn't support removing files/directories " "(read-only filesystem)") mcxml_rmdir = mcxml_rm def mcxml_mkdir(): """Create a directory in the VFS""" sys.exit("XML VFS doesn't support creating directories " "(read-only filesystem)") def xml_error(error_str): logger.critical("Error walking XML file: %s", error_str) sys.exit(1) command = sys.argv[1] procname = "mcxml_" + command g = globals() if procname not in g: logger.critical("Unknown command %s", command) sys.exit(1) try: g[procname]() except SystemExit: raise except: logger.exception("Error during run")