X-Git-Url: https://git.phdru.name/?p=extfs.d.git;a=blobdiff_plain;f=xml;h=6cf2276f9c3fbcdcd0adcf0dce5fb658ea126644;hp=1c98c86fbc6a97ed0bd7a1d5ecd074736757d85f;hb=af76dda2d3c72d5712ae4559c38da2829398a27b;hpb=2782d4a3a8ed6f10f461935136e0ba7f2d998064 diff --git a/xml b/xml index 1c98c86..6cf2276 100755 --- a/xml +++ b/xml @@ -4,36 +4,69 @@ The script requires Midnight Commander 3.1+ (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/). -For mc 4.7+ put the script in $HOME/.mc/extfs.d. +For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d. For older versions put it in /usr/[local/][lib|share]/mc/extfs and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini. Make the script executable. -Run this "cd" command in the Midnight Commander (in the "bindings" file the -command is "%cd"): cd file.xml#xml, where "file.xml" is the name of your xml -file. +For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings" +file the command is "%cd"): cd file/xml://; in older versions it is +cd file#xml, where "file" is the name of your XML file. The VFS represents tags as directories; the directories are numbered to distinguish tags with the same name; also numbering helps to sort tags by their order in XML instead of sorting them by name. Attributes, text nodes and comments are represented as text files; attributes are shown in a file named "attributes", attributes are listed in the file as name=value lines (I -deliberately ignore a small chance there is a newline character in values). The -filesystem is read-only. +deliberately ignore a small chance of newline characters in values); names and +values are reencoded to the console encoding. Text nodes and comments are +collected in a file named "text", stripped and reencoded. The filesystem is +read-only. + +Implementation based on minidom doesn't understand namespaces, it just shows +them among other attributes. ElementTree-based implementation doesn't show +namespaces at all. Implementation based on lxml.etree shows namespaces in a +separate file "namespaces". + +It is useful to have a top-down view on an XML structure but it's especially +convenient to extract text values from tags. One can get, for example, a +base64-encoded image - just walk down the VFS to the tag's directory and copy +its text file to a real file. The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs """ -__version__ = "0.2.0" +__version__ = "0.6.1" __author__ = "Oleg Broytman " __copyright__ = "Copyright (C) 2013 PhiloSoft Design" __license__ = "GPL" +force_implementation = None # Can be None for default choice, + # 'lxml', 'elementtree' or 'minidom' + +use_minidom = True +use_elementtree = False +use_lxml = False + import math import sys import xml.dom.minidom +try: + import xml.etree.ElementTree as ET +except ImportError: + pass +else: + use_elementtree = True + +try: + import lxml.etree as etree +except ImportError: + pass +else: + use_lxml = True + try: import locale use_locale = True @@ -71,7 +104,7 @@ XML Virtual FileSystem for Midnight Commander version %s Author: %s %s -This is not a program. Put the script in $HOME/.mc/extfs.d or +This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or /usr/[local/][lib|share]/mc/extfs. For more information read the source!""", __version__, __author__, __copyright__ ) @@ -80,80 +113,273 @@ This is not a program. Put the script in $HOME/.mc/extfs.d or locale.setlocale(locale.LC_ALL, '') -def _attrs2text(attrs): - attrs = [attrs.item(i) for i in range (attrs.length)] - return '\n'.join(["%s=%s" % - (a.name.encode(default_encoding, "replace"), - a.value.encode(default_encoding, "replace")) - for a in attrs]) - -def _list(node, path=''): - childNodes = node.childNodes - n = 0 - for element in childNodes: - if element.localName: - n += 1 - if n: - width = int(math.log10(n))+1 - template = "%%0%dd" % width - else: - template = "%d" - n = 0 - for element in childNodes: - if element.localName: + +class XmlVfs(object): + """Abstract base class""" + + supports_namespaces = False + + def __init__(self): + self.parse() + + def list(self): + self._list(self.getroot()) + + def _list(self, node, path=''): + n = len(self.getchildren(node)) + if n: + width = int(math.log10(n)) + 1 + template = "%%0%dd" % width + else: + template = "%d" + n = 0 + for element in self.getchildren(node): + if not self.istag(element): + continue n += 1 + tag = self.getlocalname(self.gettag(element)) if path: - subpath = '%s/%s %s' % (path, template % n, element.localName) + subpath = '%s/%s %s' % (path, template % n, tag) else: - subpath = '%s %s' % (template % n, element.localName) + subpath = '%s %s' % (template % n, tag) subpath_encoded = subpath.encode(default_encoding, "replace") - print "dr--r--r-- 1 user group 0 Jan 1 00:00 %s" % subpath_encoded - attrs = element.attributes - if attrs: - attr_text = _attrs2text(attrs) + print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded + if self.getattrs(element): + attr_text = self.attrs2text(element) print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % ( len(attr_text), subpath_encoded) - _list(element, subpath) + if self.supports_namespaces and self.has_ns(element): + ns_text = self.ns2text(element) + print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/namespaces" % ( + len(ns_text), subpath_encoded) + text = self.collect_text(element) + if text: + print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % ( + len(text), subpath_encoded) + self._list(element, subpath) + + def get_tag_node(self, node, i): + n = 0 + for element in self.getchildren(node): + if self.istag(element): + n += 1 + if n == i: + return element + xml_error('There are less than %d nodes' % i) + + def attrs2text(self, node): + attr_accumulator = [] + for name, value in self.getattrs(node): + name = self.getlocalname(name).encode(default_encoding, "replace") + value = value.encode(default_encoding, "replace") + attr_accumulator.append("%s=%s" % (name, value)) + return '\n'.join(attr_accumulator) + + def has_ns(self, node): + return False + + +class MiniDOMXmlVfs(XmlVfs): + def parse(self): + self.document = xml.dom.minidom.parse(sys.argv[2]) + + def getattrs(self, node): + attrs = node.attributes + attrs = [attrs.item(i) for i in range(attrs.length)] + return [(a.name, a.value) for a in attrs] + + def collect_text(self, node): + text_accumulator = [] + for element in node.childNodes: + if element.localName: + continue + elif element.nodeType == element.COMMENT_NODE: + text = u"" % element.nodeValue + elif element.nodeType == element.TEXT_NODE: + text = element.nodeValue.strip() + else: + xml_error("Unknown node type %d" % element.nodeType) + if text: text_accumulator.append(text) + return '\n'.join(text_accumulator).encode(default_encoding, "replace") + + def getroot(self): + return self.document + + def getchildren(self, node): + return node.childNodes + + def gettag(self, node): + return node.localName + + def istag(self, node): + return bool(node.localName) + + def getlocalname(self, name): + return name + + +if use_elementtree or use_lxml: + class CommonEtreeXmlVfs(XmlVfs): + def getattrs(self, node): + return node.attrib.items() + + def collect_text(self, node): + text_accumulator = [] + if node.text: + text = node.text.strip() + if text: text_accumulator.append(text) + for element in node: + if not self.istag(element): + text = u"" % element.text + text_accumulator.append(text) + if node.tail: + text = node.tail.strip() + if text: text_accumulator.append(text) + return '\n'.join(text_accumulator).encode(default_encoding, "replace") + + def getchildren(self, node): + return list(node) + + def gettag(self, node): + return node.tag + + def istag(self, node): + return isinstance(node.tag, basestring) + + +if use_elementtree: + class ElementTreeXmlVfs(CommonEtreeXmlVfs): + def parse(self): + # Copied from http://effbot.org/zone/element-pi.ht + + class PIParser(ET.XMLTreeBuilder): + + def __init__(self): + ET.XMLTreeBuilder.__init__(self) + # assumes ElementTree 1.2.X + self._parser.CommentHandler = self.handle_comment + self._parser.ProcessingInstructionHandler = self.handle_pi + self._target.start("document", {}) + + def close(self): + self._target.end("document") + return ET.XMLTreeBuilder.close(self) + + def handle_comment(self, data): + self._target.start(ET.Comment, {}) + self._target.data(data) + self._target.end(ET.Comment) + + def handle_pi(self, target, data): + self._target.start(ET.PI, {}) + self._target.data(target + " " + data) + self._target.end(ET.PI) + + self.document = ET.parse(sys.argv[2], PIParser()) + + def getroot(self): + return self.document.getroot() + + def getlocalname(self, name): + if name.startswith('{'): + name = name.split('}', 1)[1] # Remove XML namespace + return name + + +if use_lxml: + class LxmlEtreeXmlVfs(CommonEtreeXmlVfs): + supports_namespaces = True + + def parse(self): + self.document = etree.parse(sys.argv[2]) + + def getroot(self): + return [self.document.getroot()] + + def getlocalname(self, name): + return etree.QName(name).localname + + def _get_local_ns(self, node): + this_nsmap = node.nsmap + parent = node.getparent() + if parent is not None: + parents_nsmap = parent.nsmap + for key in parents_nsmap: + del this_nsmap[key] + return this_nsmap + + def has_ns(self, node): + return bool(self._get_local_ns(node)) + + def ns2text(self, node): + ns_accumulator = [] + for name, value in self._get_local_ns(node).items(): + name = name.encode(default_encoding, "replace") + value = value.encode(default_encoding, "replace") + ns_accumulator.append("%s=%s" % (name, value)) + return '\n'.join(ns_accumulator) + + +def build_xmlvfs(): + if force_implementation is None: + if use_lxml: + return LxmlEtreeXmlVfs() + elif use_elementtree: + return ElementTreeXmlVfs() + else: + return MiniDOMXmlVfs() + elif force_implementation == 'minidom': + return MiniDOMXmlVfs() + elif force_implementation == 'elementtree': + return ElementTreeXmlVfs() + elif force_implementation == 'lxml': + return LxmlEtreeXmlVfs() + else: + raise ValueError('Unknown implementation "%s", expected "minidom", "elementtree" or "lxml"' % force_implementation) + def mcxml_list(): """List the entire VFS""" - dom = xml.dom.minidom.parse(sys.argv[2]) - _list(dom) + xmlvfs = build_xmlvfs() + xmlvfs.list() -def _get_child_node(node, i): - n = 0 - for element in node.childNodes: - if element.localName: - n += 1 - if n == i: - return element - xml_error('There are less than %d nodes' % i) - def mcxml_copyout(): """Extract a file from the VFS""" - node = xml.dom.minidom.parse(sys.argv[2]) + xmlvfs = build_xmlvfs() xml_filename = sys.argv[3] real_filename = sys.argv[4] + node = xmlvfs.getroot() for path_comp in xml_filename.split('/'): if ' ' in path_comp: i = int(path_comp.split(' ', 1)[0]) - node = _get_child_node(node, i) - elif path_comp == 'attributes': + node = xmlvfs.get_tag_node(node, i) + elif path_comp in ('attributes', 'namespaces', 'text'): break else: xml_error('Unknown file') if path_comp == 'attributes': - attrs = node.attributes - if attrs: - text = _attrs2text(attrs) + if xmlvfs.getattrs(node): + text = xmlvfs.attrs2text(node) else: xml_error('There are no attributes') + elif path_comp == 'namespaces': + if xmlvfs.supports_namespaces and xmlvfs.has_ns(element): + text = xmlvfs.ns2text(node) + else: + xml_error('There are no namespaces') + + elif path_comp == 'text': + text = xmlvfs.collect_text(node) + + else: + xml_error('Unknown file') + outfile = open(real_filename, 'w') outfile.write(text) outfile.close()