X-Git-Url: https://git.phdru.name/?p=extfs.d.git;a=blobdiff_plain;f=xml;h=6cf2276f9c3fbcdcd0adcf0dce5fb658ea126644;hp=7c7ffd5181c38d2cc8299284b8fe2150998e0786;hb=af76dda2d3c72d5712ae4559c38da2829398a27b;hpb=6f9a8a4fd54b90ea71ca7f2703059e660608ca82 diff --git a/xml b/xml index 7c7ffd5..6cf2276 100755 --- a/xml +++ b/xml @@ -26,7 +26,7 @@ read-only. Implementation based on minidom doesn't understand namespaces, it just shows them among other attributes. ElementTree-based implementation doesn't show namespaces at all. Implementation based on lxml.etree shows namespaces in a -separate file "namespaces"; every child tag includes its parent's namespaces. +separate file "namespaces". It is useful to have a top-down view on an XML structure but it's especially convenient to extract text values from tags. One can get, for example, a @@ -37,7 +37,7 @@ The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs """ -__version__ = "0.6.0" +__version__ = "0.6.1" __author__ = "Oleg Broytman " __copyright__ = "Copyright (C) 2013 PhiloSoft Design" __license__ = "GPL" @@ -144,7 +144,7 @@ class XmlVfs(object): subpath = '%s %s' % (template % n, tag) subpath_encoded = subpath.encode(default_encoding, "replace") print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded - if self.hasattrs(element): + if self.getattrs(element): attr_text = self.attrs2text(element) print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % ( len(attr_text), subpath_encoded) @@ -158,10 +158,7 @@ class XmlVfs(object): len(text), subpath_encoded) self._list(element, subpath) - def has_ns(self, node): - return False - - def get_child_node(self, node, i): + def get_tag_node(self, node, i): n = 0 for element in self.getchildren(node): if self.istag(element): @@ -170,21 +167,26 @@ class XmlVfs(object): return element xml_error('There are less than %d nodes' % i) + def attrs2text(self, node): + attr_accumulator = [] + for name, value in self.getattrs(node): + name = self.getlocalname(name).encode(default_encoding, "replace") + value = value.encode(default_encoding, "replace") + attr_accumulator.append("%s=%s" % (name, value)) + return '\n'.join(attr_accumulator) + + def has_ns(self, node): + return False + class MiniDOMXmlVfs(XmlVfs): def parse(self): self.document = xml.dom.minidom.parse(sys.argv[2]) - def hasattrs(self, node): - return bool(node.attributes) - - def attrs2text(self, node): + def getattrs(self, node): attrs = node.attributes attrs = [attrs.item(i) for i in range(attrs.length)] - return '\n'.join(["%s=%s" % - (a.name.encode(default_encoding, "replace"), - a.value.encode(default_encoding, "replace")) - for a in attrs]) + return [(a.name, a.value) for a in attrs] def collect_text(self, node): text_accumulator = [] @@ -218,8 +220,8 @@ class MiniDOMXmlVfs(XmlVfs): if use_elementtree or use_lxml: class CommonEtreeXmlVfs(XmlVfs): - def hasattrs(self, node): - return bool(node.attrib) + def getattrs(self, node): + return node.attrib.items() def collect_text(self, node): text_accumulator = [] @@ -235,9 +237,6 @@ if use_elementtree or use_lxml: if text: text_accumulator.append(text) return '\n'.join(text_accumulator).encode(default_encoding, "replace") - def getroot(self): - return self.document.getroot() - def getchildren(self, node): return list(node) @@ -278,15 +277,8 @@ if use_elementtree: self.document = ET.parse(sys.argv[2], PIParser()) - def attrs2text(self, node): - attr_accumulator = [] - for name, value in node.attrib.items(): - name = name.encode(default_encoding, "replace") - value = value.encode(default_encoding, "replace") - if name.startswith('{'): - name = name.split('}', 1)[1] # Remove XML namespace - attr_accumulator.append("%s=%s" % (name, value)) - return '\n'.join(attr_accumulator) + def getroot(self): + return self.document.getroot() def getlocalname(self, name): if name.startswith('{'): @@ -301,31 +293,32 @@ if use_lxml: def parse(self): self.document = etree.parse(sys.argv[2]) - def attrs2text(self, node): - attr_accumulator = [] - for name, value in node.attrib.items(): - name = etree.QName(name).localname.encode(default_encoding, "replace") - value = value.encode(default_encoding, "replace") - attr_accumulator.append("%s=%s" % (name, value)) - return '\n'.join(attr_accumulator) + def getroot(self): + return [self.document.getroot()] + + def getlocalname(self, name): + return etree.QName(name).localname + + def _get_local_ns(self, node): + this_nsmap = node.nsmap + parent = node.getparent() + if parent is not None: + parents_nsmap = parent.nsmap + for key in parents_nsmap: + del this_nsmap[key] + return this_nsmap def has_ns(self, node): - return bool(node.nsmap) + return bool(self._get_local_ns(node)) def ns2text(self, node): ns_accumulator = [] - for name, value in node.nsmap.items(): + for name, value in self._get_local_ns(node).items(): name = name.encode(default_encoding, "replace") value = value.encode(default_encoding, "replace") ns_accumulator.append("%s=%s" % (name, value)) return '\n'.join(ns_accumulator) - def getroot(self): - return [self.document.getroot()] - - def getlocalname(self, name): - return etree.QName(name).localname - def build_xmlvfs(): if force_implementation is None: @@ -363,23 +356,23 @@ def mcxml_copyout(): for path_comp in xml_filename.split('/'): if ' ' in path_comp: i = int(path_comp.split(' ', 1)[0]) - node = xmlvfs.get_child_node(node, i) + node = xmlvfs.get_tag_node(node, i) elif path_comp in ('attributes', 'namespaces', 'text'): break else: xml_error('Unknown file') if path_comp == 'attributes': - if xmlvfs.hasattrs(node): + if xmlvfs.getattrs(node): text = xmlvfs.attrs2text(node) else: xml_error('There are no attributes') elif path_comp == 'namespaces': - if xmlvfs.has_ns(node): + if xmlvfs.supports_namespaces and xmlvfs.has_ns(element): text = xmlvfs.ns2text(node) else: - xml_error('There are no attributes') + xml_error('There are no namespaces') elif path_comp == 'text': text = xmlvfs.collect_text(node)