X-Git-Url: https://git.phdru.name/?p=extfs.d.git;a=blobdiff_plain;f=xml;h=e34ac224f51fa09ab8f38e166114280c6b53ac91;hp=15ef7117f65788269070259e166d9585ada87955;hb=4cc3c29346f56087c1c50a8eb3c49a7a1788aa9e;hpb=71b9735e5da1c1685d95e99032799f637c8953fd diff --git a/xml b/xml index 15ef711..e34ac22 100755 --- a/xml +++ b/xml @@ -13,17 +13,24 @@ For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings" file the command is "%cd"): cd file/xml://; in older versions it is cd file#xml, where "file" is the name of your XML file. +See detailed installation instructions at +http://phdru.name/Software/mc/xml_INSTALL.html. + The VFS represents tags as directories; the directories are numbered to -distinguish tags with the same name; also numbering helps to sort tags by their -order in XML instead of sorting them by name. Attributes, text nodes and -comments are represented as text files; attributes are shown in a file named -"attributes", attributes are listed in the file as name=value lines (I -deliberately ignore a small chance of newline characters in values); names and -values are reencoded to the console encoding. Text nodes and comments are -collected in a file named "text", stripped and reencoded. The filesystem is -read-only. ElementTree-based implementation doesn't show namespaces as -attributes; lxml.etree-based implementation shows namespaces as a separate file -"namespaces"; every child tag includes its parent's namespaces. +distinguish tags with the same name; numbering also helps to sort tags by their +order in XML instead of sorting them by name and prevents name clash when tag +names coincide with the names of special files used by XML VFS. Attributes, +text nodes and comments are represented as text files; attributes are shown in +a file named "attributes", attributes are listed in the file as name=value +lines (I deliberately ignore a small chance of newline characters in values); +names and values are reencoded to the console encoding. Text nodes and comments +are collected in a file named "text", stripped and reencoded. The filesystem is +read-only. + +Implementation based on minidom doesn't understand namespaces, it just shows +them among other attributes. ElementTree-based implementation doesn't show +namespaces at all. Implementation based on lxml.etree shows namespaces in a +separate file "namespaces". It is useful to have a top-down view on an XML structure but it's especially convenient to extract text values from tags. One can get, for example, a @@ -34,20 +41,22 @@ The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs """ -__version__ = "0.5.0" +__version__ = "1.1.5" __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2013 PhiloSoft Design" +__copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design" __license__ = "GPL" -default_implementation = None # Can be None for default choice, - # 'lxml', 'elementtree' or 'minidom' +# Can be None for default choice, 'lxml', 'elementtree' or 'minidom'. +force_implementation = None use_minidom = True use_elementtree = False use_lxml = False import math +from os.path import getmtime import sys +from time import localtime import xml.dom.minidom try: @@ -65,29 +74,29 @@ else: use_lxml = True try: - import locale - use_locale = True + import locale + use_locale = True except ImportError: - use_locale = False + use_locale = False if use_locale: - # Get the default charset. - try: - lcAll = locale.getdefaultlocale() - except locale.Error, err: - print >>sys.stderr, "WARNING:", err - lcAll = [] - - if len(lcAll) == 2: - default_encoding = lcAll[1] - else: - try: - default_encoding = locale.getpreferredencoding() - except locale.Error, err: - print >>sys.stderr, "WARNING:", err - default_encoding = sys.getdefaultencoding() + # Get the default charset. + try: + lcAll = locale.getdefaultlocale() + except locale.Error, err: + print >>sys.stderr, "WARNING:", err + lcAll = [] + + if len(lcAll) == 2: + default_encoding = lcAll[1] + else: + try: + default_encoding = locale.getpreferredencoding() + except locale.Error, err: + print >>sys.stderr, "WARNING:", err + default_encoding = sys.getdefaultencoding() else: - default_encoding = sys.getdefaultencoding() + default_encoding = sys.getdefaultencoding() import logging logger = logging.getLogger('xml-mcextfs') @@ -103,8 +112,7 @@ Author: %s This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or /usr/[local/][lib|share]/mc/extfs. For more information read the source!""", - __version__, __author__, __copyright__ -) + __version__, __author__, __copyright__) sys.exit(1) @@ -112,16 +120,60 @@ locale.setlocale(locale.LC_ALL, '') class XmlVfs(object): + """Abstract base class""" + + supports_namespaces = False + def __init__(self): + self.xml_file = sys.argv[2] self.parse() def list(self): - self._list(self.getroot()) + Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5] + self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M) - def has_ns(self, node): - return False + root_comments = self.get_root_comments() + if root_comments: + print "-r--r--r-- 1 user group %d %s text" % ( + len(root_comments), self.xml_file_dt) + + self._list(self.getroot()) - def get_child_node(self, node, i): + def _list(self, node, path=''): + n = len(self.getchildren(node)) + if n: + width = int(math.log10(n)) + 1 + template = "%%0%dd" % width + else: + template = "%d" + n = 0 + for element in self.getchildren(node): + if not self.istag(element): + continue + n += 1 + tag = self.getlocalname(self.gettag(element)) + if path: + subpath = '%s/%s %s' % (path, template % n, tag) + else: + subpath = '%s %s' % (template % n, tag) + subpath_encoded = subpath.encode(default_encoding, "replace") + print "dr-xr-xr-x 1 user group 0 %s %s" % ( + self.xml_file_dt, subpath_encoded) + if self.getattrs(element): + attr_text = self.attrs2text(element) + print "-r--r--r-- 1 user group %d %s %s/attributes" % ( + len(attr_text), self.xml_file_dt, subpath_encoded) + if self.supports_namespaces and self.has_ns(element): + ns_text = self.ns2text(element) + print "-r--r--r-- 1 user group %d %s %s/namespaces" % ( + len(ns_text), self.xml_file_dt, subpath_encoded) + text = self.collect_text(element) + if text: + print "-r--r--r-- 1 user group %d %s %s/text" % ( + len(text), self.xml_file_dt, subpath_encoded) + self._list(element, subpath) + + def get_tag_node(self, node, i): n = 0 for element in self.getchildren(node): if self.istag(element): @@ -130,21 +182,26 @@ class XmlVfs(object): return element xml_error('There are less than %d nodes' % i) + def attrs2text(self, node): + attr_accumulator = [] + for name, value in self.getattrs(node): + name = self.getlocalname(name).encode(default_encoding, "replace") + value = value.encode(default_encoding, "replace") + attr_accumulator.append("%s=%s" % (name, value)) + return '\n'.join(attr_accumulator) + + def has_ns(self, node): + return False + class MiniDOMXmlVfs(XmlVfs): def parse(self): - self.document = xml.dom.minidom.parse(sys.argv[2]) + self.document = xml.dom.minidom.parse(self.xml_file) - def hasattrs(self, node): - return bool(node.attributes) - - def attrs2text(self, node): + def getattrs(self, node): attrs = node.attributes - attrs = [attrs.item(i) for i in range (attrs.length)] - return '\n'.join(["%s=%s" % - (a.name.encode(default_encoding, "replace"), - a.value.encode(default_encoding, "replace")) - for a in attrs]) + attrs = [attrs.item(i) for i in range(attrs.length)] + return [(a.name, a.value) for a in attrs] def collect_text(self, node): text_accumulator = [] @@ -157,75 +214,57 @@ class MiniDOMXmlVfs(XmlVfs): text = element.nodeValue.strip() else: xml_error("Unknown node type %d" % element.nodeType) - if text: text_accumulator.append(text) + if text: + text_accumulator.append(text) return '\n'.join(text_accumulator).encode(default_encoding, "replace") - def _list(self, node, path=''): - childNodes = node.childNodes - n = 0 - for element in childNodes: - if element.localName: - n += 1 - if n: - width = int(math.log10(n))+1 - template = "%%0%dd" % width - else: - template = "%d" - n = 0 - for element in childNodes: - if element.localName: - n += 1 - if path: - subpath = '%s/%s %s' % (path, template % n, element.localName) - else: - subpath = '%s %s' % (template % n, element.localName) - subpath_encoded = subpath.encode(default_encoding, "replace") - print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded - if self.hasattrs(element): - attr_text = self.attrs2text(element) - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % ( - len(attr_text), subpath_encoded) - text = self.collect_text(element) - if text: - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % ( - len(text), subpath_encoded) - self._list(element, subpath) - def getroot(self): return self.document + def get_root_comments(self): + return self.collect_text(self.document) + def getchildren(self, node): return node.childNodes + def gettag(self, node): + return node.localName + def istag(self, node): return bool(node.localName) + def getlocalname(self, name): + return name + if use_elementtree or use_lxml: class CommonEtreeXmlVfs(XmlVfs): - def hasattrs(self, node): - return bool(node.attrib) + def getattrs(self, node): + return node.attrib.items() def collect_text(self, node): text_accumulator = [] if node.text: text = node.text.strip() - if text: text_accumulator.append(text) + if text: + text_accumulator.append(text) for element in node: if not self.istag(element): - text = u"" % text + text = u"" % element.text text_accumulator.append(text) if node.tail: text = node.tail.strip() - if text: text_accumulator.append(text) - return '\n'.join(text_accumulator).encode(default_encoding, "replace") - - def getroot(self): - return self.document.getroot() + if text: + text_accumulator.append(text) + return '\n'.join(text_accumulator).encode( + default_encoding, "replace") def getchildren(self, node): return list(node) + def gettag(self, node): + return node.tag + def istag(self, node): return isinstance(node.tag, basestring) @@ -233,153 +272,115 @@ if use_elementtree or use_lxml: if use_elementtree: class ElementTreeXmlVfs(CommonEtreeXmlVfs): def parse(self): - # Copied from http://effbot.org/zone/element-pi.ht + # Copied from http://effbot.org/zone/element-pi.htm class PIParser(ET.XMLTreeBuilder): - def __init__(self): - ET.XMLTreeBuilder.__init__(self) - # assumes ElementTree 1.2.X - self._parser.CommentHandler = self.handle_comment - self._parser.ProcessingInstructionHandler = self.handle_pi - self._target.start("document", {}) - - def close(self): - self._target.end("document") - return ET.XMLTreeBuilder.close(self) - - def handle_comment(self, data): - self._target.start(ET.Comment, {}) - self._target.data(data) - self._target.end(ET.Comment) - - def handle_pi(self, target, data): - self._target.start(ET.PI, {}) - self._target.data(target + " " + data) - self._target.end(ET.PI) - - self.document = ET.parse(sys.argv[2], PIParser()) - - def attrs2text(self, node): - attr_accumulator = [] - for name, value in node.attrib.items(): - name = name.encode(default_encoding, "replace") - value = value.encode(default_encoding, "replace") - if name.startswith('{'): - name = name.split('}', 1)[1] # Remove XML namespace - attr_accumulator.append("%s=%s" % (name, value)) - return '\n'.join(attr_accumulator) - - def _list(self, node, path=''): - n = len(node) - if n: - width = int(math.log10(n))+1 - template = "%%0%dd" % width - else: - template = "%d" - n = 0 - for element in node: - if not isinstance(element.tag, basestring): - continue - n += 1 - tag = element.tag - if tag.startswith('{'): - tag = tag.split('}', 1)[1] # Remove XML namespace - if path: - subpath = '%s/%s %s' % (path, template % n, tag) - else: - subpath = '%s %s' % (template % n, tag) - subpath_encoded = subpath.encode(default_encoding, "replace") - print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded - if self.hasattrs(element): - attr_text = self.attrs2text(element) - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % ( - len(attr_text), subpath_encoded) - text = self.collect_text(element) - if text: - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % ( - len(text), subpath_encoded) - self._list(element, subpath) + def __init__(self): + ET.XMLTreeBuilder.__init__(self) + # assumes ElementTree 1.2.X + self._parser.CommentHandler = self.handle_comment + self._parser.ProcessingInstructionHandler = self.handle_pi + self._target.start("document", {}) + + def close(self): + self._target.end("document") + return ET.XMLTreeBuilder.close(self) + + def handle_comment(self, data): + self._target.start(ET.Comment, {}) + self._target.data(data) + self._target.end(ET.Comment) + + def handle_pi(self, target, data): + self._target.start(ET.PI, {}) + self._target.data(target + " " + data) + self._target.end(ET.PI) + + self.document = ET.parse(self.xml_file, PIParser()) + + def getroot(self): + return self.document.getroot() + + def get_root_comments(self): + text_accumulator = [] + for element in self.getroot(): + if not self.istag(element): + text = u"" % element.text + text_accumulator.append(text) + return '\n'.join(text_accumulator).encode( + default_encoding, "replace") + + def getlocalname(self, name): + if name.startswith('{'): + name = name.split('}', 1)[1] # Remove XML namespace + return name if use_lxml: class LxmlEtreeXmlVfs(CommonEtreeXmlVfs): + supports_namespaces = True + def parse(self): - self.document = etree.parse(sys.argv[2]) + self.document = etree.parse(self.xml_file) - def attrs2text(self, node): - attr_accumulator = [] - for name, value in node.attrib.items(): - name = etree.QName(name).localname.encode(default_encoding, "replace") - value = value.encode(default_encoding, "replace") - attr_accumulator.append("%s=%s" % (name, value)) - return '\n'.join(attr_accumulator) + def getroot(self): + return [self.document.getroot()] + + def get_root_comments(self): + text_accumulator = [] + for element in self.document.getroot().itersiblings( + tag=etree.Comment, preceding=True): + text = u"" % element.text + text_accumulator.append(text) + return '\n'.join(text_accumulator).encode( + default_encoding, "replace") + + def getlocalname(self, name): + return etree.QName(name).localname + + def _get_local_ns(self, node): + this_nsmap = node.nsmap + parent = node.getparent() + if parent is not None: + parent_nsmap = parent.nsmap + for key in parent_nsmap: + if this_nsmap[key] == parent_nsmap[key]: + del this_nsmap[key] + return this_nsmap def has_ns(self, node): - return bool(node.nsmap) + return bool(self._get_local_ns(node)) def ns2text(self, node): ns_accumulator = [] - for name, value in node.nsmap.items(): - name = name.encode(default_encoding, "replace") + for name, value in self._get_local_ns(node).items(): + if name: + name = name.encode(default_encoding, "replace") + else: + name = 'xmlns' value = value.encode(default_encoding, "replace") ns_accumulator.append("%s=%s" % (name, value)) return '\n'.join(ns_accumulator) - def list(self): - self._list(self.getroot()) - - def _list(self, node, path=''): - n = len(node) - if n: - width = int(math.log10(n))+1 - template = "%%0%dd" % width - else: - template = "%d" - n = 0 - for element in node: - if not isinstance(element.tag, basestring): - continue - n += 1 - tag = etree.QName(element.tag).localname - if path: - subpath = '%s/%s %s' % (path, template % n, tag) - else: - subpath = '%s %s' % (template % n, tag) - subpath_encoded = subpath.encode(default_encoding, "replace") - print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded - if self.hasattrs(element): - attr_text = self.attrs2text(element) - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % ( - len(attr_text), subpath_encoded) - if element.nsmap: - ns_text = self.ns2text(element) - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/namespaces" % ( - len(ns_text), subpath_encoded) - text = self.collect_text(element) - if text: - print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % ( - len(text), subpath_encoded) - self._list(element, subpath) - - def getroot(self): - return [self.document.getroot()] - def build_xmlvfs(): - if default_implementation is None: + if force_implementation is None: if use_lxml: return LxmlEtreeXmlVfs() elif use_elementtree: return ElementTreeXmlVfs() else: return MiniDOMXmlVfs() - elif default_implementation == 'minidom': + elif force_implementation == 'minidom': return MiniDOMXmlVfs() - elif default_implementation == 'elementtree': + elif force_implementation == 'elementtree': return ElementTreeXmlVfs() - elif default_implementation == 'lxml': + elif force_implementation == 'lxml': return LxmlEtreeXmlVfs() + else: + raise ValueError('Unknown implementation "%s", expected "minidom", ' + '"elementtree" or "lxml"' % force_implementation) def mcxml_list(): @@ -400,26 +401,29 @@ def mcxml_copyout(): for path_comp in xml_filename.split('/'): if ' ' in path_comp: i = int(path_comp.split(' ', 1)[0]) - node = xmlvfs.get_child_node(node, i) + node = xmlvfs.get_tag_node(node, i) elif path_comp in ('attributes', 'namespaces', 'text'): break else: xml_error('Unknown file') if path_comp == 'attributes': - if xmlvfs.hasattrs(node): + if xmlvfs.getattrs(node): text = xmlvfs.attrs2text(node) else: xml_error('There are no attributes') elif path_comp == 'namespaces': - if xmlvfs.has_ns(node): + if xmlvfs.supports_namespaces and xmlvfs.has_ns(node): text = xmlvfs.ns2text(node) else: - xml_error('There are no attributes') + xml_error('There are no namespaces') elif path_comp == 'text': - text = xmlvfs.collect_text(node) + if '/' in xml_filename: + text = xmlvfs.collect_text(node) + else: + text = xmlvfs.get_root_comments() else: xml_error('Unknown file') @@ -433,15 +437,19 @@ def mcxml_copyin(): """Put a file to the VFS""" sys.exit("XML VFS doesn't support adding files (read-only filesystem)") + def mcxml_rm(): """Remove a file from the VFS""" - sys.exit("XML VFS doesn't support removing files/directories (read-only filesystem)") + sys.exit("XML VFS doesn't support removing files/directories " + "(read-only filesystem)") mcxml_rmdir = mcxml_rm + def mcxml_mkdir(): """Create a directory in the VFS""" - sys.exit("XML VFS doesn't support creating directories (read-only filesystem)") + sys.exit("XML VFS doesn't support creating directories " + "(read-only filesystem)") def xml_error(error_str): @@ -452,7 +460,7 @@ command = sys.argv[1] procname = "mcxml_" + command g = globals() -if not g.has_key(procname): +if procname not in g: logger.critical("Unknown command %s", command) sys.exit(1)