file the command is "%cd"): cd file/xml://; in older versions it is
cd file#xml, where "file" is the name of your XML file.
+See detailed installation instructions at
+http://phdru.name/Software/mc/INSTALL.html.
+
The VFS represents tags as directories; the directories are numbered to
-distinguish tags with the same name; also numbering helps to sort tags by their
+distinguish tags with the same name; numbering also helps to sort tags by their
order in XML instead of sorting them by name. Attributes, text nodes and
comments are represented as text files; attributes are shown in a file named
"attributes", attributes are listed in the file as name=value lines (I
Implementation based on minidom doesn't understand namespaces, it just shows
them among other attributes. ElementTree-based implementation doesn't show
namespaces at all. Implementation based on lxml.etree shows namespaces in a
-separate file "namespaces"; every child tag includes its parent's namespaces.
+separate file "namespaces".
It is useful to have a top-down view on an XML structure but it's especially
convenient to extract text values from tags. One can get, for example, a
"""
-__version__ = "0.6.0"
+__version__ = "1.1.0"
__author__ = "Oleg Broytman <phd@phdru.name>"
__copyright__ = "Copyright (C) 2013 PhiloSoft Design"
__license__ = "GPL"
self.parse()
def list(self):
+ root_comments = self.get_root_comments()
+ if root_comments:
+ print "-r--r--r-- 1 user group %d Jan 1 00:00 text" % (len(root_comments))
self._list(self.getroot())
def _list(self, node, path=''):
subpath = '%s %s' % (template % n, tag)
subpath_encoded = subpath.encode(default_encoding, "replace")
print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
- if self.hasattrs(element):
+ if self.getattrs(element):
attr_text = self.attrs2text(element)
print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
len(attr_text), subpath_encoded)
len(text), subpath_encoded)
self._list(element, subpath)
- def has_ns(self, node):
- return False
-
- def get_child_node(self, node, i):
+ def get_tag_node(self, node, i):
n = 0
for element in self.getchildren(node):
if self.istag(element):
return element
xml_error('There are less than %d nodes' % i)
+ def attrs2text(self, node):
+ attr_accumulator = []
+ for name, value in self.getattrs(node):
+ name = self.getlocalname(name).encode(default_encoding, "replace")
+ value = value.encode(default_encoding, "replace")
+ attr_accumulator.append("%s=%s" % (name, value))
+ return '\n'.join(attr_accumulator)
+
+ def has_ns(self, node):
+ return False
+
class MiniDOMXmlVfs(XmlVfs):
def parse(self):
self.document = xml.dom.minidom.parse(sys.argv[2])
- def hasattrs(self, node):
- return bool(node.attributes)
-
- def attrs2text(self, node):
+ def getattrs(self, node):
attrs = node.attributes
attrs = [attrs.item(i) for i in range(attrs.length)]
- return '\n'.join(["%s=%s" %
- (a.name.encode(default_encoding, "replace"),
- a.value.encode(default_encoding, "replace"))
- for a in attrs])
+ return [(a.name, a.value) for a in attrs]
def collect_text(self, node):
text_accumulator = []
def getroot(self):
return self.document
+ def get_root_comments(self):
+ return self.collect_text(self.document)
+
def getchildren(self, node):
return node.childNodes
if use_elementtree or use_lxml:
class CommonEtreeXmlVfs(XmlVfs):
- def hasattrs(self, node):
- return bool(node.attrib)
+ def getattrs(self, node):
+ return node.attrib.items()
def collect_text(self, node):
text_accumulator = []
if text: text_accumulator.append(text)
return '\n'.join(text_accumulator).encode(default_encoding, "replace")
- def getroot(self):
- return self.document.getroot()
-
def getchildren(self, node):
return list(node)
self.document = ET.parse(sys.argv[2], PIParser())
- def attrs2text(self, node):
- attr_accumulator = []
- for name, value in node.attrib.items():
- name = name.encode(default_encoding, "replace")
- value = value.encode(default_encoding, "replace")
- if name.startswith('{'):
- name = name.split('}', 1)[1] # Remove XML namespace
- attr_accumulator.append("%s=%s" % (name, value))
- return '\n'.join(attr_accumulator)
+ def getroot(self):
+ return self.document.getroot()
+
+ def get_root_comments(self):
+ text_accumulator = []
+ for element in self.getroot():
+ if not self.istag(element):
+ text = u"<!--%s-->" % element.text
+ text_accumulator.append(text)
+ return '\n'.join(text_accumulator).encode(default_encoding, "replace")
def getlocalname(self, name):
if name.startswith('{'):
def parse(self):
self.document = etree.parse(sys.argv[2])
- def attrs2text(self, node):
- attr_accumulator = []
- for name, value in node.attrib.items():
- name = etree.QName(name).localname.encode(default_encoding, "replace")
- value = value.encode(default_encoding, "replace")
- attr_accumulator.append("%s=%s" % (name, value))
- return '\n'.join(attr_accumulator)
+ def getroot(self):
+ return [self.document.getroot()]
+
+ def get_root_comments(self):
+ text_accumulator = []
+ for element in self.document.getroot().itersiblings(tag=etree.Comment, preceding=True):
+ text = u"<!--%s-->" % element.text
+ text_accumulator.append(text)
+ return '\n'.join(text_accumulator).encode(default_encoding, "replace")
+
+ def getlocalname(self, name):
+ return etree.QName(name).localname
+
+ def _get_local_ns(self, node):
+ this_nsmap = node.nsmap
+ parent = node.getparent()
+ if parent is not None:
+ parents_nsmap = parent.nsmap
+ for key in parents_nsmap:
+ del this_nsmap[key]
+ return this_nsmap
def has_ns(self, node):
- return bool(node.nsmap)
+ return bool(self._get_local_ns(node))
def ns2text(self, node):
ns_accumulator = []
- for name, value in node.nsmap.items():
+ for name, value in self._get_local_ns(node).items():
+ if name is None: name = ''
name = name.encode(default_encoding, "replace")
value = value.encode(default_encoding, "replace")
ns_accumulator.append("%s=%s" % (name, value))
return '\n'.join(ns_accumulator)
- def getroot(self):
- return [self.document.getroot()]
-
- def getlocalname(self, name):
- return etree.QName(name).localname
-
def build_xmlvfs():
if force_implementation is None:
for path_comp in xml_filename.split('/'):
if ' ' in path_comp:
i = int(path_comp.split(' ', 1)[0])
- node = xmlvfs.get_child_node(node, i)
+ node = xmlvfs.get_tag_node(node, i)
elif path_comp in ('attributes', 'namespaces', 'text'):
break
else:
xml_error('Unknown file')
if path_comp == 'attributes':
- if xmlvfs.hasattrs(node):
+ if xmlvfs.getattrs(node):
text = xmlvfs.attrs2text(node)
else:
xml_error('There are no attributes')
elif path_comp == 'namespaces':
- if xmlvfs.has_ns(node):
+ if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
text = xmlvfs.ns2text(node)
else:
- xml_error('There are no attributes')
+ xml_error('There are no namespaces')
elif path_comp == 'text':
- text = xmlvfs.collect_text(node)
+ if '/' in xml_filename:
+ text = xmlvfs.collect_text(node)
+ else:
+ text = xmlvfs.get_root_comments()
else:
xml_error('Unknown file')