X-Git-Url: https://git.phdru.name/?p=extfs.d.git;a=blobdiff_plain;f=xml;h=f7dc718b71f1e08f08a7c66c08012e3c09018066;hp=e78e47e4bc20d13403eb85f7cf8b43591ec18fdf;hb=2a70cd12f889c7f71520de7cce97d92f54f3f0ac;hpb=db63b272c4064cfc541f89f70aa8354ee6920905 diff --git a/xml b/xml index e78e47e..f7dc718 100755 --- a/xml +++ b/xml @@ -13,20 +13,24 @@ For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings" file the command is "%cd"): cd file/xml://; in older versions it is cd file#xml, where "file" is the name of your XML file. +See detailed installation instructions at +http://phdru.name/Software/mc/xml_INSTALL.html. + The VFS represents tags as directories; the directories are numbered to -distinguish tags with the same name; also numbering helps to sort tags by their -order in XML instead of sorting them by name. Attributes, text nodes and -comments are represented as text files; attributes are shown in a file named -"attributes", attributes are listed in the file as name=value lines (I -deliberately ignore a small chance of newline characters in values); names and -values are reencoded to the console encoding. Text nodes and comments are -collected in a file named "text", stripped and reencoded. The filesystem is +distinguish tags with the same name; numbering also helps to sort tags by their +order in XML instead of sorting them by name and prevents name clash when tag +names coincide with the names of special files used by XML VFS. Attributes, +text nodes and comments are represented as text files; attributes are shown in +a file named "attributes", attributes are listed in the file as name=value +lines (I deliberately ignore a small chance of newline characters in values); +names and values are reencoded to the console encoding. Text nodes and comments +are collected in a file named "text", stripped and reencoded. The filesystem is read-only. Implementation based on minidom doesn't understand namespaces, it just shows them among other attributes. ElementTree-based implementation doesn't show namespaces at all. Implementation based on lxml.etree shows namespaces in a -separate file "namespaces"; every child tag includes its parent's namespaces. +separate file "namespaces". It is useful to have a top-down view on an XML structure but it's especially convenient to extract text values from tags. One can get, for example, a @@ -37,9 +41,9 @@ The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs """ -__version__ = "0.6.0" +__version__ = "1.1.5" __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2013 PhiloSoft Design" +__copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design" __license__ = "GPL" force_implementation = None # Can be None for default choice, @@ -50,7 +54,9 @@ use_elementtree = False use_lxml = False import math +from os.path import getmtime import sys +from time import localtime import xml.dom.minidom try: @@ -120,9 +126,18 @@ class XmlVfs(object): supports_namespaces = False def __init__(self): + self.xml_file = sys.argv[2] self.parse() def list(self): + Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5] + self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M) + + root_comments = self.get_root_comments() + if root_comments: + print "-r--r--r-- 1 user group %d %s text" % ( + len(root_comments), self.xml_file_dt) + self._list(self.getroot()) def _list(self, node, path=''): @@ -143,21 +158,31 @@ class XmlVfs(object): else: subpath = '%s %s' % (template % n, tag) subpath_encoded = subpath.encode(default_encoding, "replace") - print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded + print "dr-xr-xr-x 1 user group 0 %s %s" % ( + self.xml_file_dt, subpath_encoded) if self.getattrs(element): attr_text = self.attrs2text(element) - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % ( - len(attr_text), subpath_encoded) + print "-r--r--r-- 1 user group %d %s %s/attributes" % ( + len(attr_text), self.xml_file_dt, subpath_encoded) if self.supports_namespaces and self.has_ns(element): ns_text = self.ns2text(element) - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/namespaces" % ( - len(ns_text), subpath_encoded) + print "-r--r--r-- 1 user group %d %s %s/namespaces" % ( + len(ns_text), self.xml_file_dt, subpath_encoded) text = self.collect_text(element) if text: - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % ( - len(text), subpath_encoded) + print "-r--r--r-- 1 user group %d %s %s/text" % ( + len(text), self.xml_file_dt, subpath_encoded) self._list(element, subpath) + def get_tag_node(self, node, i): + n = 0 + for element in self.getchildren(node): + if self.istag(element): + n += 1 + if n == i: + return element + xml_error('There are less than %d nodes' % i) + def attrs2text(self, node): attr_accumulator = [] for name, value in self.getattrs(node): @@ -169,19 +194,10 @@ class XmlVfs(object): def has_ns(self, node): return False - def get_child_node(self, node, i): - n = 0 - for element in self.getchildren(node): - if self.istag(element): - n += 1 - if n == i: - return element - xml_error('There are less than %d nodes' % i) - class MiniDOMXmlVfs(XmlVfs): def parse(self): - self.document = xml.dom.minidom.parse(sys.argv[2]) + self.document = xml.dom.minidom.parse(self.xml_file) def getattrs(self, node): attrs = node.attributes @@ -205,6 +221,9 @@ class MiniDOMXmlVfs(XmlVfs): def getroot(self): return self.document + def get_root_comments(self): + return self.collect_text(self.document) + def getchildren(self, node): return node.childNodes @@ -237,9 +256,6 @@ if use_elementtree or use_lxml: if text: text_accumulator.append(text) return '\n'.join(text_accumulator).encode(default_encoding, "replace") - def getroot(self): - return self.document.getroot() - def getchildren(self, node): return list(node) @@ -253,7 +269,7 @@ if use_elementtree or use_lxml: if use_elementtree: class ElementTreeXmlVfs(CommonEtreeXmlVfs): def parse(self): - # Copied from http://effbot.org/zone/element-pi.ht + # Copied from http://effbot.org/zone/element-pi.htm class PIParser(ET.XMLTreeBuilder): @@ -278,7 +294,18 @@ if use_elementtree: self._target.data(target + " " + data) self._target.end(ET.PI) - self.document = ET.parse(sys.argv[2], PIParser()) + self.document = ET.parse(self.xml_file, PIParser()) + + def getroot(self): + return self.document.getroot() + + def get_root_comments(self): + text_accumulator = [] + for element in self.getroot(): + if not self.istag(element): + text = u"" % element.text + text_accumulator.append(text) + return '\n'.join(text_accumulator).encode(default_encoding, "replace") def getlocalname(self, name): if name.startswith('{'): @@ -291,25 +318,45 @@ if use_lxml: supports_namespaces = True def parse(self): - self.document = etree.parse(sys.argv[2]) + self.document = etree.parse(self.xml_file) + + def getroot(self): + return [self.document.getroot()] + + def get_root_comments(self): + text_accumulator = [] + for element in self.document.getroot().itersiblings(tag=etree.Comment, preceding=True): + text = u"" % element.text + text_accumulator.append(text) + return '\n'.join(text_accumulator).encode(default_encoding, "replace") + + def getlocalname(self, name): + return etree.QName(name).localname + + def _get_local_ns(self, node): + this_nsmap = node.nsmap + parent = node.getparent() + if parent is not None: + parent_nsmap = parent.nsmap + for key in parent_nsmap: + if this_nsmap[key] == parent_nsmap[key]: + del this_nsmap[key] + return this_nsmap def has_ns(self, node): - return bool(node.nsmap) + return bool(self._get_local_ns(node)) def ns2text(self, node): ns_accumulator = [] - for name, value in node.nsmap.items(): - name = name.encode(default_encoding, "replace") + for name, value in self._get_local_ns(node).items(): + if name: + name = name.encode(default_encoding, "replace") + else: + name = 'xmlns' value = value.encode(default_encoding, "replace") ns_accumulator.append("%s=%s" % (name, value)) return '\n'.join(ns_accumulator) - def getroot(self): - return [self.document.getroot()] - - def getlocalname(self, name): - return etree.QName(name).localname - def build_xmlvfs(): if force_implementation is None: @@ -347,7 +394,7 @@ def mcxml_copyout(): for path_comp in xml_filename.split('/'): if ' ' in path_comp: i = int(path_comp.split(' ', 1)[0]) - node = xmlvfs.get_child_node(node, i) + node = xmlvfs.get_tag_node(node, i) elif path_comp in ('attributes', 'namespaces', 'text'): break else: @@ -360,13 +407,16 @@ def mcxml_copyout(): xml_error('There are no attributes') elif path_comp == 'namespaces': - if xmlvfs.has_ns(node): + if xmlvfs.supports_namespaces and xmlvfs.has_ns(node): text = xmlvfs.ns2text(node) else: - xml_error('There are no attributes') + xml_error('There are no namespaces') elif path_comp == 'text': - text = xmlvfs.collect_text(node) + if '/' in xml_filename: + text = xmlvfs.collect_text(node) + else: + text = xmlvfs.get_root_comments() else: xml_error('Unknown file')