2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; in older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 The VFS represents tags as directories; the directories are numbered to
17 distinguish tags with the same name; also numbering helps to sort tags by their
18 order in XML instead of sorting them by name. Attributes, text nodes and
19 comments are represented as text files; attributes are shown in a file named
20 "attributes", attributes are listed in the file as name=value lines (I
21 deliberately ignore a small chance of newline characters in values); names and
22 values are reencoded to the console encoding. Text nodes and comments are
23 collected in a file named "text", stripped and reencoded. The filesystem is
26 Implementation based on minidom doesn't understand namespaces, it just shows
27 them among other attributes. ElementTree-based implementation doesn't show
28 namespaces at all. Implementation based on lxml.etree shows namespaces in a
29 separate file "namespaces"; every child tag includes its parent's namespaces.
31 It is useful to have a top-down view on an XML structure but it's especially
32 convenient to extract text values from tags. One can get, for example, a
33 base64-encoded image - just walk down the VFS to the tag's directory and copy
34 its text file to a real file.
36 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
41 __author__ = "Oleg Broytman <phd@phdru.name>"
42 __copyright__ = "Copyright (C) 2013 PhiloSoft Design"
45 default_implementation = None # Can be None for default choice,
46 # 'lxml', 'elementtree' or 'minidom'
49 use_elementtree = False
54 import xml.dom.minidom
57 import xml.etree.ElementTree as ET
61 use_elementtree = True
64 import lxml.etree as etree
77 # Get the default charset.
79 lcAll = locale.getdefaultlocale()
80 except locale.Error, err:
81 print >>sys.stderr, "WARNING:", err
85 default_encoding = lcAll[1]
88 default_encoding = locale.getpreferredencoding()
89 except locale.Error, err:
90 print >>sys.stderr, "WARNING:", err
91 default_encoding = sys.getdefaultencoding()
93 default_encoding = sys.getdefaultencoding()
96 logger = logging.getLogger('xml-mcextfs')
97 log_err_handler = logging.StreamHandler(sys.stderr)
98 logger.addHandler(log_err_handler)
99 logger.setLevel(logging.INFO)
101 if len(sys.argv) < 3:
103 XML Virtual FileSystem for Midnight Commander version %s
107 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
108 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
109 __version__, __author__, __copyright__
114 locale.setlocale(locale.LC_ALL, '')
117 class XmlVfs(object):
122 self._list(self.getroot())
124 def has_ns(self, node):
127 def get_child_node(self, node, i):
129 for element in self.getchildren(node):
130 if self.istag(element):
134 xml_error('There are less than %d nodes' % i)
137 class MiniDOMXmlVfs(XmlVfs):
139 self.document = xml.dom.minidom.parse(sys.argv[2])
141 def hasattrs(self, node):
142 return bool(node.attributes)
144 def attrs2text(self, node):
145 attrs = node.attributes
146 attrs = [attrs.item(i) for i in range(attrs.length)]
147 return '\n'.join(["%s=%s" %
148 (a.name.encode(default_encoding, "replace"),
149 a.value.encode(default_encoding, "replace"))
152 def collect_text(self, node):
153 text_accumulator = []
154 for element in node.childNodes:
155 if element.localName:
157 elif element.nodeType == element.COMMENT_NODE:
158 text = u"<!--%s-->" % element.nodeValue
159 elif element.nodeType == element.TEXT_NODE:
160 text = element.nodeValue.strip()
162 xml_error("Unknown node type %d" % element.nodeType)
163 if text: text_accumulator.append(text)
164 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
166 def _list(self, node, path=''):
167 childNodes = node.childNodes
169 for element in childNodes:
170 if element.localName:
173 width = int(math.log10(n)) + 1
174 template = "%%0%dd" % width
178 for element in childNodes:
179 if element.localName:
182 subpath = '%s/%s %s' % (path, template % n, element.localName)
184 subpath = '%s %s' % (template % n, element.localName)
185 subpath_encoded = subpath.encode(default_encoding, "replace")
186 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
187 if self.hasattrs(element):
188 attr_text = self.attrs2text(element)
189 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
190 len(attr_text), subpath_encoded)
191 text = self.collect_text(element)
193 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
194 len(text), subpath_encoded)
195 self._list(element, subpath)
200 def getchildren(self, node):
201 return node.childNodes
203 def istag(self, node):
204 return bool(node.localName)
207 if use_elementtree or use_lxml:
208 class CommonEtreeXmlVfs(XmlVfs):
209 def hasattrs(self, node):
210 return bool(node.attrib)
212 def collect_text(self, node):
213 text_accumulator = []
215 text = node.text.strip()
216 if text: text_accumulator.append(text)
218 if not self.istag(element):
219 text = u"<!--%s-->" % element.text
220 text_accumulator.append(text)
222 text = node.tail.strip()
223 if text: text_accumulator.append(text)
224 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
227 return self.document.getroot()
229 def getchildren(self, node):
232 def istag(self, node):
233 return isinstance(node.tag, basestring)
237 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
239 # Copied from http://effbot.org/zone/element-pi.ht
241 class PIParser(ET.XMLTreeBuilder):
244 ET.XMLTreeBuilder.__init__(self)
245 # assumes ElementTree 1.2.X
246 self._parser.CommentHandler = self.handle_comment
247 self._parser.ProcessingInstructionHandler = self.handle_pi
248 self._target.start("document", {})
251 self._target.end("document")
252 return ET.XMLTreeBuilder.close(self)
254 def handle_comment(self, data):
255 self._target.start(ET.Comment, {})
256 self._target.data(data)
257 self._target.end(ET.Comment)
259 def handle_pi(self, target, data):
260 self._target.start(ET.PI, {})
261 self._target.data(target + " " + data)
262 self._target.end(ET.PI)
264 self.document = ET.parse(sys.argv[2], PIParser())
266 def attrs2text(self, node):
267 attr_accumulator = []
268 for name, value in node.attrib.items():
269 name = name.encode(default_encoding, "replace")
270 value = value.encode(default_encoding, "replace")
271 if name.startswith('{'):
272 name = name.split('}', 1)[1] # Remove XML namespace
273 attr_accumulator.append("%s=%s" % (name, value))
274 return '\n'.join(attr_accumulator)
276 def _list(self, node, path=''):
279 width = int(math.log10(n))+1
280 template = "%%0%dd" % width
285 if not isinstance(element.tag, basestring):
289 if tag.startswith('{'):
290 tag = tag.split('}', 1)[1] # Remove XML namespace
292 subpath = '%s/%s %s' % (path, template % n, tag)
294 subpath = '%s %s' % (template % n, tag)
295 subpath_encoded = subpath.encode(default_encoding, "replace")
296 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
297 if self.hasattrs(element):
298 attr_text = self.attrs2text(element)
299 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
300 len(attr_text), subpath_encoded)
301 text = self.collect_text(element)
303 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
304 len(text), subpath_encoded)
305 self._list(element, subpath)
309 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
311 self.document = etree.parse(sys.argv[2])
313 def attrs2text(self, node):
314 attr_accumulator = []
315 for name, value in node.attrib.items():
316 name = etree.QName(name).localname.encode(default_encoding, "replace")
317 value = value.encode(default_encoding, "replace")
318 attr_accumulator.append("%s=%s" % (name, value))
319 return '\n'.join(attr_accumulator)
321 def has_ns(self, node):
322 return bool(node.nsmap)
324 def ns2text(self, node):
326 for name, value in node.nsmap.items():
327 name = name.encode(default_encoding, "replace")
328 value = value.encode(default_encoding, "replace")
329 ns_accumulator.append("%s=%s" % (name, value))
330 return '\n'.join(ns_accumulator)
333 self._list(self.getroot())
335 def _list(self, node, path=''):
338 width = int(math.log10(n))+1
339 template = "%%0%dd" % width
344 if not isinstance(element.tag, basestring):
347 tag = etree.QName(element.tag).localname
349 subpath = '%s/%s %s' % (path, template % n, tag)
351 subpath = '%s %s' % (template % n, tag)
352 subpath_encoded = subpath.encode(default_encoding, "replace")
353 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
354 if self.hasattrs(element):
355 attr_text = self.attrs2text(element)
356 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
357 len(attr_text), subpath_encoded)
359 ns_text = self.ns2text(element)
360 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/namespaces" % (
361 len(ns_text), subpath_encoded)
362 text = self.collect_text(element)
364 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
365 len(text), subpath_encoded)
366 self._list(element, subpath)
369 return [self.document.getroot()]
373 if default_implementation is None:
375 return LxmlEtreeXmlVfs()
376 elif use_elementtree:
377 return ElementTreeXmlVfs()
379 return MiniDOMXmlVfs()
380 elif default_implementation == 'minidom':
381 return MiniDOMXmlVfs()
382 elif default_implementation == 'elementtree':
383 return ElementTreeXmlVfs()
384 elif default_implementation == 'lxml':
385 return LxmlEtreeXmlVfs()
389 """List the entire VFS"""
391 xmlvfs = build_xmlvfs()
396 """Extract a file from the VFS"""
398 xmlvfs = build_xmlvfs()
399 xml_filename = sys.argv[3]
400 real_filename = sys.argv[4]
402 node = xmlvfs.getroot()
403 for path_comp in xml_filename.split('/'):
405 i = int(path_comp.split(' ', 1)[0])
406 node = xmlvfs.get_child_node(node, i)
407 elif path_comp in ('attributes', 'namespaces', 'text'):
410 xml_error('Unknown file')
412 if path_comp == 'attributes':
413 if xmlvfs.hasattrs(node):
414 text = xmlvfs.attrs2text(node)
416 xml_error('There are no attributes')
418 elif path_comp == 'namespaces':
419 if xmlvfs.has_ns(node):
420 text = xmlvfs.ns2text(node)
422 xml_error('There are no attributes')
424 elif path_comp == 'text':
425 text = xmlvfs.collect_text(node)
428 xml_error('Unknown file')
430 outfile = open(real_filename, 'w')
436 """Put a file to the VFS"""
437 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
440 """Remove a file from the VFS"""
441 sys.exit("XML VFS doesn't support removing files/directories (read-only filesystem)")
443 mcxml_rmdir = mcxml_rm
446 """Create a directory in the VFS"""
447 sys.exit("XML VFS doesn't support creating directories (read-only filesystem)")
450 def xml_error(error_str):
451 logger.critical("Error walking XML file: %s", error_str)
454 command = sys.argv[1]
455 procname = "mcxml_" + command
458 if not g.has_key(procname):
459 logger.critical("Unknown command %s", command)
467 logger.exception("Error during run")