2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; In older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 The VFS represents tags as directories; the directories are numbered to
17 distinguish tags with the same name; also numbering helps to sort tags by their
18 order in XML instead of sorting them by name. Attributes, text nodes and
19 comments are represented as text files; attributes are shown in a file named
20 "attributes", attributes are listed in the file as name=value lines (I
21 deliberately ignore a small chance of newline characters in values); names and
22 values are reencoded to the console encoding. Text nodes and comments are
23 collected in a file named "text", stripped and reencoded. The filesystem is
24 read-only. ElementTree-based implementation doesn't show namespaces as
25 attributes; lxml.etree-based implementation shows namespaces as a separate file
26 "namespaces"; every child tag includes its parent's namespaces.
28 It is useful to have a top-down view on an XML structure but it's especially
29 convenient to extract text values from tags. One can get, for example, a
30 base64-encoded image - just walk down the VFS to the tag's directory and copy
31 its text file to a real file.
33 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
38 __author__ = "Oleg Broytman <phd@phdru.name>"
39 __copyright__ = "Copyright (C) 2013 PhiloSoft Design"
42 default_implementation = None # Can be None for default choice,
43 # 'lxml', 'elementtree' or 'minidom'
46 use_elementtree = False
51 import xml.dom.minidom
54 import xml.etree.ElementTree as ET
58 use_elementtree = True
61 import lxml.etree as etree
74 # Get the default charset.
76 lcAll = locale.getdefaultlocale()
77 except locale.Error, err:
78 print >>sys.stderr, "WARNING:", err
82 default_encoding = lcAll[1]
85 default_encoding = locale.getpreferredencoding()
86 except locale.Error, err:
87 print >>sys.stderr, "WARNING:", err
88 default_encoding = sys.getdefaultencoding()
90 default_encoding = sys.getdefaultencoding()
93 logger = logging.getLogger('xml-mcextfs')
94 log_err_handler = logging.StreamHandler(sys.stderr)
95 logger.addHandler(log_err_handler)
96 logger.setLevel(logging.INFO)
100 XML Virtual FileSystem for Midnight Commander version %s
104 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
105 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
106 __version__, __author__, __copyright__
111 locale.setlocale(locale.LC_ALL, '')
114 class XmlVfs(object):
119 self._list(self.getroot())
121 def has_ns(self, node):
124 def get_child_node(self, node, i):
126 for element in self.getchildren(node):
127 if self.istag(element):
131 xml_error('There are less than %d nodes' % i)
134 class MiniDOMXmlVfs(XmlVfs):
136 self.document = xml.dom.minidom.parse(sys.argv[2])
138 def hasattrs(self, node):
139 return bool(node.attributes)
141 def attrs2text(self, node):
142 attrs = node.attributes
143 attrs = [attrs.item(i) for i in range (attrs.length)]
144 return '\n'.join(["%s=%s" %
145 (a.name.encode(default_encoding, "replace"),
146 a.value.encode(default_encoding, "replace"))
149 def collect_text(self, node):
150 text_accumulator = []
151 for element in node.childNodes:
152 if element.localName:
154 elif element.nodeType == element.COMMENT_NODE:
155 text = u"<!--%s-->" % element.nodeValue
156 elif element.nodeType == element.TEXT_NODE:
157 text = element.nodeValue.strip()
159 xml_error("Unknown node type %d" % element.nodeType)
160 if text: text_accumulator.append(text)
161 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
163 def _list(self, node, path=''):
164 childNodes = node.childNodes
166 for element in childNodes:
167 if element.localName:
170 width = int(math.log10(n))+1
171 template = "%%0%dd" % width
175 for element in childNodes:
176 if element.localName:
179 subpath = '%s/%s %s' % (path, template % n, element.localName)
181 subpath = '%s %s' % (template % n, element.localName)
182 subpath_encoded = subpath.encode(default_encoding, "replace")
183 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
184 if self.hasattrs(element):
185 attr_text = self.attrs2text(element)
186 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
187 len(attr_text), subpath_encoded)
188 text = self.collect_text(element)
190 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
191 len(text), subpath_encoded)
192 self._list(element, subpath)
197 def getchildren(self, node):
198 return node.childNodes
200 def istag(self, node):
201 return bool(node.localName)
204 if use_elementtree or use_lxml:
205 class CommonEtreeXmlVfs(XmlVfs):
206 def hasattrs(self, node):
207 return bool(node.attrib)
209 def collect_text(self, node):
210 text_accumulator = []
212 text = node.text.strip()
213 if text: text_accumulator.append(text)
215 if not self.istag(element):
216 text = u"<!--%s-->" % text
217 text_accumulator.append(text)
219 text = node.tail.strip()
220 if text: text_accumulator.append(text)
221 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
224 return self.document.getroot()
226 def getchildren(self, node):
229 def istag(self, node):
230 return isinstance(node.tag, basestring)
234 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
236 # Copied from http://effbot.org/zone/element-pi.ht
238 class PIParser(ET.XMLTreeBuilder):
241 ET.XMLTreeBuilder.__init__(self)
242 # assumes ElementTree 1.2.X
243 self._parser.CommentHandler = self.handle_comment
244 self._parser.ProcessingInstructionHandler = self.handle_pi
245 self._target.start("document", {})
248 self._target.end("document")
249 return ET.XMLTreeBuilder.close(self)
251 def handle_comment(self, data):
252 self._target.start(ET.Comment, {})
253 self._target.data(data)
254 self._target.end(ET.Comment)
256 def handle_pi(self, target, data):
257 self._target.start(ET.PI, {})
258 self._target.data(target + " " + data)
259 self._target.end(ET.PI)
261 self.document = ET.parse(sys.argv[2], PIParser())
263 def attrs2text(self, node):
264 attr_accumulator = []
265 for name, value in node.attrib.items():
266 name = name.encode(default_encoding, "replace")
267 value = value.encode(default_encoding, "replace")
268 if name.startswith('{'):
269 name = name.split('}', 1)[1] # Remove XML namespace
270 attr_accumulator.append("%s=%s" % (name, value))
271 return '\n'.join(attr_accumulator)
273 def _list(self, node, path=''):
276 width = int(math.log10(n))+1
277 template = "%%0%dd" % width
282 if not isinstance(element.tag, basestring):
286 if tag.startswith('{'):
287 tag = tag.split('}', 1)[1] # Remove XML namespace
289 subpath = '%s/%s %s' % (path, template % n, tag)
291 subpath = '%s %s' % (template % n, tag)
292 subpath_encoded = subpath.encode(default_encoding, "replace")
293 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
294 if self.hasattrs(element):
295 attr_text = self.attrs2text(element)
296 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
297 len(attr_text), subpath_encoded)
298 text = self.collect_text(element)
300 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
301 len(text), subpath_encoded)
302 self._list(element, subpath)
306 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
308 self.document = etree.parse(sys.argv[2])
310 def attrs2text(self, node):
311 attr_accumulator = []
312 for name, value in node.attrib.items():
313 name = etree.QName(name).localname.encode(default_encoding, "replace")
314 value = value.encode(default_encoding, "replace")
315 attr_accumulator.append("%s=%s" % (name, value))
316 return '\n'.join(attr_accumulator)
318 def has_ns(self, node):
319 return bool(node.nsmap)
321 def ns2text(self, node):
323 for name, value in node.nsmap.items():
324 name = name.encode(default_encoding, "replace")
325 value = value.encode(default_encoding, "replace")
326 ns_accumulator.append("%s=%s" % (name, value))
327 return '\n'.join(ns_accumulator)
330 self._list(self.getroot())
332 def _list(self, node, path=''):
335 width = int(math.log10(n))+1
336 template = "%%0%dd" % width
341 if not isinstance(element.tag, basestring):
344 tag = etree.QName(element.tag).localname
346 subpath = '%s/%s %s' % (path, template % n, tag)
348 subpath = '%s %s' % (template % n, tag)
349 subpath_encoded = subpath.encode(default_encoding, "replace")
350 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
351 if self.hasattrs(element):
352 attr_text = self.attrs2text(element)
353 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
354 len(attr_text), subpath_encoded)
356 ns_text = self.ns2text(element)
357 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/namespaces" % (
358 len(ns_text), subpath_encoded)
359 text = self.collect_text(element)
361 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
362 len(text), subpath_encoded)
363 self._list(element, subpath)
366 return [self.document.getroot()]
370 if default_implementation is None:
372 return LxmlEtreeXmlVfs()
373 elif use_elementtree:
374 return ElementTreeXmlVfs()
376 return MiniDOMXmlVfs()
377 elif default_implementation == 'minidom':
378 return MiniDOMXmlVfs()
379 elif default_implementation == 'elementtree':
380 return ElementTreeXmlVfs()
381 elif default_implementation == 'lxml':
382 return LxmlEtreeXmlVfs()
386 """List the entire VFS"""
388 xmlvfs = build_xmlvfs()
393 """Extract a file from the VFS"""
395 xmlvfs = build_xmlvfs()
396 xml_filename = sys.argv[3]
397 real_filename = sys.argv[4]
399 node = xmlvfs.getroot()
400 for path_comp in xml_filename.split('/'):
402 i = int(path_comp.split(' ', 1)[0])
403 node = xmlvfs.get_child_node(node, i)
404 elif path_comp in ('attributes', 'namespaces', 'text'):
407 xml_error('Unknown file')
409 if path_comp == 'attributes':
410 if xmlvfs.hasattrs(node):
411 text = xmlvfs.attrs2text(node)
413 xml_error('There are no attributes')
415 elif path_comp == 'namespaces':
416 if xmlvfs.has_ns(node):
417 text = xmlvfs.ns2text(node)
419 xml_error('There are no attributes')
421 elif path_comp == 'text':
422 text = xmlvfs.collect_text(node)
425 xml_error('Unknown file')
427 outfile = open(real_filename, 'w')
433 """Put a file to the VFS"""
434 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
437 """Remove a file from the VFS"""
438 sys.exit("XML VFS doesn't support removing files/directories (read-only filesystem)")
440 mcxml_rmdir = mcxml_rm
443 """Create a directory in the VFS"""
444 sys.exit("XML VFS doesn't support creating directories (read-only filesystem)")
447 def xml_error(error_str):
448 logger.critical("Error walking XML file: %s", error_str)
451 command = sys.argv[1]
452 procname = "mcxml_" + command
455 if not g.has_key(procname):
456 logger.critical("Unknown command %s", command)
464 logger.exception("Error during run")