2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; in older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 See detailed installation instructions at
17 http://phdru.name/Software/mc/xml_INSTALL.html.
19 The VFS represents tags as directories; the directories are numbered to
20 distinguish tags with the same name; numbering also helps to sort tags by their
21 order in XML instead of sorting them by name and prevents name clash when tag
22 names coincide with the names of special files used by XML VFS. Attributes,
23 text nodes and comments are represented as text files; attributes are shown in
24 a file named "attributes", attributes are listed in the file as name=value
25 lines (I deliberately ignore a small chance of newline characters in values);
26 names and values are reencoded to the console encoding. Text nodes and comments
27 are collected in a file named "text", stripped and reencoded. The filesystem is
30 Date/time for all directories/files set to the last modification time of the
33 Implementation based on minidom doesn't understand namespaces, it just shows
34 them among other attributes. ElementTree-based implementation doesn't show
35 namespaces at all. Implementation based on lxml.etree shows namespaces in a
36 separate file "namespaces".
38 It is useful to have a top-down view on an XML structure but it's especially
39 convenient to extract text values from tags. One can get, for example, a
40 base64-encoded image - just walk down the VFS to the tag's directory and copy
41 its text file to a real file.
43 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
48 __author__ = "Oleg Broytman <phd@phdru.name>"
49 __copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design"
52 # Can be None for default choice, 'lxml', 'elementtree' or 'minidom'.
53 force_implementation = None
56 use_elementtree = False
60 from os.path import getmtime
62 from time import localtime
63 import xml.dom.minidom
66 import xml.etree.ElementTree as ET
70 use_elementtree = True
73 import lxml.etree as etree
86 # Get the default charset.
88 lcAll = locale.getdefaultlocale()
89 except locale.Error, err:
90 print >>sys.stderr, "WARNING:", err
94 default_encoding = lcAll[1]
97 default_encoding = locale.getpreferredencoding()
98 except locale.Error, err:
99 print >>sys.stderr, "WARNING:", err
100 default_encoding = sys.getdefaultencoding()
102 default_encoding = sys.getdefaultencoding()
105 logger = logging.getLogger('xml-mcextfs')
106 log_err_handler = logging.StreamHandler(sys.stderr)
107 logger.addHandler(log_err_handler)
108 logger.setLevel(logging.INFO)
110 if len(sys.argv) < 3:
112 XML Virtual FileSystem for Midnight Commander version %s
116 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
117 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
118 __version__, __author__, __copyright__)
122 locale.setlocale(locale.LC_ALL, '')
125 class XmlVfs(object):
126 """Abstract base class"""
128 supports_namespaces = False
131 self.xml_file = sys.argv[2]
135 Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5]
136 self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M)
138 root_comments = self.get_root_comments()
140 print "-r--r--r-- 1 user group %d %s text" % (
141 len(root_comments), self.xml_file_dt)
143 self._list(self.getroot())
145 def _list(self, node, path=''):
146 n = len(self.getchildren(node))
148 width = int(math.log10(n)) + 1
149 template = "%%0%dd" % width
153 for element in self.getchildren(node):
154 if not self.istag(element):
157 tag = self.getlocalname(self.gettag(element))
159 subpath = '%s/%s %s' % (path, template % n, tag)
161 subpath = '%s %s' % (template % n, tag)
162 subpath_encoded = subpath.encode(default_encoding, "replace")
163 print "dr-xr-xr-x 1 user group 0 %s %s" % (
164 self.xml_file_dt, subpath_encoded)
165 if self.getattrs(element):
166 attr_text = self.attrs2text(element)
167 print "-r--r--r-- 1 user group %d %s %s/attributes" % (
168 len(attr_text), self.xml_file_dt, subpath_encoded)
169 if self.supports_namespaces and self.has_ns(element):
170 ns_text = self.ns2text(element)
171 print "-r--r--r-- 1 user group %d %s %s/namespaces" % (
172 len(ns_text), self.xml_file_dt, subpath_encoded)
173 text = self.collect_text(element)
175 print "-r--r--r-- 1 user group %d %s %s/text" % (
176 len(text), self.xml_file_dt, subpath_encoded)
177 self._list(element, subpath)
179 def get_tag_node(self, node, i):
181 for element in self.getchildren(node):
182 if self.istag(element):
186 xml_error('There are less than %d nodes' % i)
188 def attrs2text(self, node):
189 attr_accumulator = []
190 for name, value in self.getattrs(node):
191 name = self.getlocalname(name).encode(default_encoding, "replace")
192 value = value.encode(default_encoding, "replace")
193 attr_accumulator.append("%s=%s" % (name, value))
194 return '\n'.join(attr_accumulator)
196 def has_ns(self, node):
200 class MiniDOMXmlVfs(XmlVfs):
202 self.document = xml.dom.minidom.parse(self.xml_file)
204 def getattrs(self, node):
205 attrs = node.attributes
206 attrs = [attrs.item(i) for i in range(attrs.length)]
207 return [(a.name, a.value) for a in attrs]
209 def collect_text(self, node):
210 text_accumulator = []
211 for element in node.childNodes:
212 if element.localName:
214 elif element.nodeType == element.COMMENT_NODE:
215 text = u"<!--%s-->" % element.nodeValue
216 elif element.nodeType == element.TEXT_NODE:
217 text = element.nodeValue.strip()
219 xml_error("Unknown node type %d" % element.nodeType)
221 text_accumulator.append(text)
222 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
227 def get_root_comments(self):
228 return self.collect_text(self.document)
230 def getchildren(self, node):
231 return node.childNodes
233 def gettag(self, node):
234 return node.localName
236 def istag(self, node):
237 return bool(node.localName)
239 def getlocalname(self, name):
243 if use_elementtree or use_lxml:
244 class CommonEtreeXmlVfs(XmlVfs):
245 def getattrs(self, node):
246 return node.attrib.items()
248 def collect_text(self, node):
249 text_accumulator = []
251 text = node.text.strip()
253 text_accumulator.append(text)
255 if not self.istag(element):
256 text = u"<!--%s-->" % element.text
257 text_accumulator.append(text)
259 text = node.tail.strip()
261 text_accumulator.append(text)
262 return '\n'.join(text_accumulator).encode(
263 default_encoding, "replace")
265 def getchildren(self, node):
268 def gettag(self, node):
271 def istag(self, node):
272 return isinstance(node.tag, basestring)
276 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
278 # Copied from http://effbot.org/zone/element-pi.htm
280 class PIParser(ET.XMLTreeBuilder):
283 ET.XMLTreeBuilder.__init__(self)
284 # assumes ElementTree 1.2.X
285 self._parser.CommentHandler = self.handle_comment
286 self._parser.ProcessingInstructionHandler = self.handle_pi
287 self._target.start("document", {})
290 self._target.end("document")
291 return ET.XMLTreeBuilder.close(self)
293 def handle_comment(self, data):
294 self._target.start(ET.Comment, {})
295 self._target.data(data)
296 self._target.end(ET.Comment)
298 def handle_pi(self, target, data):
299 self._target.start(ET.PI, {})
300 self._target.data(target + " " + data)
301 self._target.end(ET.PI)
303 self.document = ET.parse(self.xml_file, PIParser())
306 return self.document.getroot()
308 def get_root_comments(self):
309 text_accumulator = []
310 for element in self.getroot():
311 if not self.istag(element):
312 text = u"<!--%s-->" % element.text
313 text_accumulator.append(text)
314 return '\n'.join(text_accumulator).encode(
315 default_encoding, "replace")
317 def getlocalname(self, name):
318 if name.startswith('{'):
319 name = name.split('}', 1)[1] # Remove XML namespace
324 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
325 supports_namespaces = True
328 self.document = etree.parse(self.xml_file)
331 return [self.document.getroot()]
333 def get_root_comments(self):
334 text_accumulator = []
335 for element in self.document.getroot().itersiblings(
336 tag=etree.Comment, preceding=True):
337 text = u"<!--%s-->" % element.text
338 text_accumulator.append(text)
339 return '\n'.join(text_accumulator).encode(
340 default_encoding, "replace")
342 def getlocalname(self, name):
343 return etree.QName(name).localname
345 def _get_local_ns(self, node):
346 this_nsmap = node.nsmap
347 parent = node.getparent()
348 if parent is not None:
349 parent_nsmap = parent.nsmap
350 for key in parent_nsmap:
351 if this_nsmap[key] == parent_nsmap[key]:
355 def has_ns(self, node):
356 return bool(self._get_local_ns(node))
358 def ns2text(self, node):
360 for name, value in self._get_local_ns(node).items():
362 name = name.encode(default_encoding, "replace")
365 value = value.encode(default_encoding, "replace")
366 ns_accumulator.append("%s=%s" % (name, value))
367 return '\n'.join(ns_accumulator)
371 if force_implementation is None:
373 return LxmlEtreeXmlVfs()
374 elif use_elementtree:
375 return ElementTreeXmlVfs()
377 return MiniDOMXmlVfs()
378 elif force_implementation == 'minidom':
379 return MiniDOMXmlVfs()
380 elif force_implementation == 'elementtree':
381 return ElementTreeXmlVfs()
382 elif force_implementation == 'lxml':
383 return LxmlEtreeXmlVfs()
385 raise ValueError('Unknown implementation "%s", expected "minidom", '
386 '"elementtree" or "lxml"' % force_implementation)
390 """List the entire VFS"""
392 xmlvfs = build_xmlvfs()
397 """Extract a file from the VFS"""
399 xmlvfs = build_xmlvfs()
400 xml_filename = sys.argv[3]
401 real_filename = sys.argv[4]
403 node = xmlvfs.getroot()
404 for path_comp in xml_filename.split('/'):
406 i = int(path_comp.split(' ', 1)[0])
407 node = xmlvfs.get_tag_node(node, i)
408 elif path_comp in ('attributes', 'namespaces', 'text'):
411 xml_error('Unknown file')
413 if path_comp == 'attributes':
414 if xmlvfs.getattrs(node):
415 text = xmlvfs.attrs2text(node)
417 xml_error('There are no attributes')
419 elif path_comp == 'namespaces':
420 if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
421 text = xmlvfs.ns2text(node)
423 xml_error('There are no namespaces')
425 elif path_comp == 'text':
426 if '/' in xml_filename:
427 text = xmlvfs.collect_text(node)
429 text = xmlvfs.get_root_comments()
432 xml_error('Unknown file')
434 outfile = open(real_filename, 'w')
440 """Put a file to the VFS"""
441 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
445 """Remove a file from the VFS"""
446 sys.exit("XML VFS doesn't support removing files/directories "
447 "(read-only filesystem)")
449 mcxml_rmdir = mcxml_rm
453 """Create a directory in the VFS"""
454 sys.exit("XML VFS doesn't support creating directories "
455 "(read-only filesystem)")
458 def xml_error(error_str):
459 logger.critical("Error walking XML file: %s", error_str)
462 command = sys.argv[1]
463 procname = "mcxml_" + command
466 if procname not in g:
467 logger.critical("Unknown command %s", command)
475 logger.exception("Error during run")