2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; in older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 See detailed installation instructions at
17 http://phdru.name/Software/mc/INSTALL.html.
19 The VFS represents tags as directories; the directories are numbered to
20 distinguish tags with the same name; numbering also helps to sort tags by their
21 order in XML instead of sorting them by name. Attributes, text nodes and
22 comments are represented as text files; attributes are shown in a file named
23 "attributes", attributes are listed in the file as name=value lines (I
24 deliberately ignore a small chance of newline characters in values); names and
25 values are reencoded to the console encoding. Text nodes and comments are
26 collected in a file named "text", stripped and reencoded. The filesystem is
29 Implementation based on minidom doesn't understand namespaces, it just shows
30 them among other attributes. ElementTree-based implementation doesn't show
31 namespaces at all. Implementation based on lxml.etree shows namespaces in a
32 separate file "namespaces".
34 It is useful to have a top-down view on an XML structure but it's especially
35 convenient to extract text values from tags. One can get, for example, a
36 base64-encoded image - just walk down the VFS to the tag's directory and copy
37 its text file to a real file.
39 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
44 __author__ = "Oleg Broytman <phd@phdru.name>"
45 __copyright__ = "Copyright (C) 2013 PhiloSoft Design"
48 force_implementation = None # Can be None for default choice,
49 # 'lxml', 'elementtree' or 'minidom'
52 use_elementtree = False
57 import xml.dom.minidom
60 import xml.etree.ElementTree as ET
64 use_elementtree = True
67 import lxml.etree as etree
80 # Get the default charset.
82 lcAll = locale.getdefaultlocale()
83 except locale.Error, err:
84 print >>sys.stderr, "WARNING:", err
88 default_encoding = lcAll[1]
91 default_encoding = locale.getpreferredencoding()
92 except locale.Error, err:
93 print >>sys.stderr, "WARNING:", err
94 default_encoding = sys.getdefaultencoding()
96 default_encoding = sys.getdefaultencoding()
99 logger = logging.getLogger('xml-mcextfs')
100 log_err_handler = logging.StreamHandler(sys.stderr)
101 logger.addHandler(log_err_handler)
102 logger.setLevel(logging.INFO)
104 if len(sys.argv) < 3:
106 XML Virtual FileSystem for Midnight Commander version %s
110 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
111 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
112 __version__, __author__, __copyright__
117 locale.setlocale(locale.LC_ALL, '')
120 class XmlVfs(object):
121 """Abstract base class"""
123 supports_namespaces = False
129 root_comments = self.get_root_comments()
131 print "-r--r--r-- 1 user group %d Jan 1 00:00 text" % (len(root_comments))
132 self._list(self.getroot())
134 def _list(self, node, path=''):
135 n = len(self.getchildren(node))
137 width = int(math.log10(n)) + 1
138 template = "%%0%dd" % width
142 for element in self.getchildren(node):
143 if not self.istag(element):
146 tag = self.getlocalname(self.gettag(element))
148 subpath = '%s/%s %s' % (path, template % n, tag)
150 subpath = '%s %s' % (template % n, tag)
151 subpath_encoded = subpath.encode(default_encoding, "replace")
152 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
153 if self.getattrs(element):
154 attr_text = self.attrs2text(element)
155 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
156 len(attr_text), subpath_encoded)
157 if self.supports_namespaces and self.has_ns(element):
158 ns_text = self.ns2text(element)
159 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/namespaces" % (
160 len(ns_text), subpath_encoded)
161 text = self.collect_text(element)
163 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
164 len(text), subpath_encoded)
165 self._list(element, subpath)
167 def get_tag_node(self, node, i):
169 for element in self.getchildren(node):
170 if self.istag(element):
174 xml_error('There are less than %d nodes' % i)
176 def attrs2text(self, node):
177 attr_accumulator = []
178 for name, value in self.getattrs(node):
179 name = self.getlocalname(name).encode(default_encoding, "replace")
180 value = value.encode(default_encoding, "replace")
181 attr_accumulator.append("%s=%s" % (name, value))
182 return '\n'.join(attr_accumulator)
184 def has_ns(self, node):
188 class MiniDOMXmlVfs(XmlVfs):
190 self.document = xml.dom.minidom.parse(sys.argv[2])
192 def getattrs(self, node):
193 attrs = node.attributes
194 attrs = [attrs.item(i) for i in range(attrs.length)]
195 return [(a.name, a.value) for a in attrs]
197 def collect_text(self, node):
198 text_accumulator = []
199 for element in node.childNodes:
200 if element.localName:
202 elif element.nodeType == element.COMMENT_NODE:
203 text = u"<!--%s-->" % element.nodeValue
204 elif element.nodeType == element.TEXT_NODE:
205 text = element.nodeValue.strip()
207 xml_error("Unknown node type %d" % element.nodeType)
208 if text: text_accumulator.append(text)
209 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
214 def get_root_comments(self):
215 return self.collect_text(self.document)
217 def getchildren(self, node):
218 return node.childNodes
220 def gettag(self, node):
221 return node.localName
223 def istag(self, node):
224 return bool(node.localName)
226 def getlocalname(self, name):
230 if use_elementtree or use_lxml:
231 class CommonEtreeXmlVfs(XmlVfs):
232 def getattrs(self, node):
233 return node.attrib.items()
235 def collect_text(self, node):
236 text_accumulator = []
238 text = node.text.strip()
239 if text: text_accumulator.append(text)
241 if not self.istag(element):
242 text = u"<!--%s-->" % element.text
243 text_accumulator.append(text)
245 text = node.tail.strip()
246 if text: text_accumulator.append(text)
247 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
249 def getchildren(self, node):
252 def gettag(self, node):
255 def istag(self, node):
256 return isinstance(node.tag, basestring)
260 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
262 # Copied from http://effbot.org/zone/element-pi.ht
264 class PIParser(ET.XMLTreeBuilder):
267 ET.XMLTreeBuilder.__init__(self)
268 # assumes ElementTree 1.2.X
269 self._parser.CommentHandler = self.handle_comment
270 self._parser.ProcessingInstructionHandler = self.handle_pi
271 self._target.start("document", {})
274 self._target.end("document")
275 return ET.XMLTreeBuilder.close(self)
277 def handle_comment(self, data):
278 self._target.start(ET.Comment, {})
279 self._target.data(data)
280 self._target.end(ET.Comment)
282 def handle_pi(self, target, data):
283 self._target.start(ET.PI, {})
284 self._target.data(target + " " + data)
285 self._target.end(ET.PI)
287 self.document = ET.parse(sys.argv[2], PIParser())
290 return self.document.getroot()
292 def get_root_comments(self):
293 text_accumulator = []
294 for element in self.getroot():
295 if not self.istag(element):
296 text = u"<!--%s-->" % element.text
297 text_accumulator.append(text)
298 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
300 def getlocalname(self, name):
301 if name.startswith('{'):
302 name = name.split('}', 1)[1] # Remove XML namespace
307 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
308 supports_namespaces = True
311 self.document = etree.parse(sys.argv[2])
314 return [self.document.getroot()]
316 def get_root_comments(self):
317 text_accumulator = []
318 for element in self.document.getroot().itersiblings(tag=etree.Comment, preceding=True):
319 text = u"<!--%s-->" % element.text
320 text_accumulator.append(text)
321 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
323 def getlocalname(self, name):
324 return etree.QName(name).localname
326 def _get_local_ns(self, node):
327 this_nsmap = node.nsmap
328 parent = node.getparent()
329 if parent is not None:
330 parents_nsmap = parent.nsmap
331 for key in parents_nsmap:
335 def has_ns(self, node):
336 return bool(self._get_local_ns(node))
338 def ns2text(self, node):
340 for name, value in self._get_local_ns(node).items():
341 if name is None: name = ''
342 name = name.encode(default_encoding, "replace")
343 value = value.encode(default_encoding, "replace")
344 ns_accumulator.append("%s=%s" % (name, value))
345 return '\n'.join(ns_accumulator)
349 if force_implementation is None:
351 return LxmlEtreeXmlVfs()
352 elif use_elementtree:
353 return ElementTreeXmlVfs()
355 return MiniDOMXmlVfs()
356 elif force_implementation == 'minidom':
357 return MiniDOMXmlVfs()
358 elif force_implementation == 'elementtree':
359 return ElementTreeXmlVfs()
360 elif force_implementation == 'lxml':
361 return LxmlEtreeXmlVfs()
363 raise ValueError('Unknown implementation "%s", expected "minidom", "elementtree" or "lxml"' % force_implementation)
367 """List the entire VFS"""
369 xmlvfs = build_xmlvfs()
374 """Extract a file from the VFS"""
376 xmlvfs = build_xmlvfs()
377 xml_filename = sys.argv[3]
378 real_filename = sys.argv[4]
380 node = xmlvfs.getroot()
381 for path_comp in xml_filename.split('/'):
383 i = int(path_comp.split(' ', 1)[0])
384 node = xmlvfs.get_tag_node(node, i)
385 elif path_comp in ('attributes', 'namespaces', 'text'):
388 xml_error('Unknown file')
390 if path_comp == 'attributes':
391 if xmlvfs.getattrs(node):
392 text = xmlvfs.attrs2text(node)
394 xml_error('There are no attributes')
396 elif path_comp == 'namespaces':
397 if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
398 text = xmlvfs.ns2text(node)
400 xml_error('There are no namespaces')
402 elif path_comp == 'text':
403 if '/' in xml_filename:
404 text = xmlvfs.collect_text(node)
406 text = xmlvfs.get_root_comments()
409 xml_error('Unknown file')
411 outfile = open(real_filename, 'w')
417 """Put a file to the VFS"""
418 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
421 """Remove a file from the VFS"""
422 sys.exit("XML VFS doesn't support removing files/directories (read-only filesystem)")
424 mcxml_rmdir = mcxml_rm
427 """Create a directory in the VFS"""
428 sys.exit("XML VFS doesn't support creating directories (read-only filesystem)")
431 def xml_error(error_str):
432 logger.critical("Error walking XML file: %s", error_str)
435 command = sys.argv[1]
436 procname = "mcxml_" + command
439 if not g.has_key(procname):
440 logger.critical("Unknown command %s", command)
448 logger.exception("Error during run")