2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; in older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 See detailed installation instructions at
17 http://phdru.name/Software/mc/xml_INSTALL.html.
19 The VFS represents tags as directories; the directories are numbered to
20 distinguish tags with the same name; numbering also helps to sort tags by their
21 order in XML instead of sorting them by name and prevents name clash when tag
22 names coincide with the names of special files used by XML VFS. Attributes,
23 text nodes and comments are represented as text files; attributes are shown in
24 a file named "attributes", attributes are listed in the file as name=value
25 lines (I deliberately ignore a small chance of newline characters in values);
26 names and values are reencoded to the console encoding. Text nodes and comments
27 are collected in a file named "text", stripped and reencoded. The filesystem is
30 Implementation based on minidom doesn't understand namespaces, it just shows
31 them among other attributes. ElementTree-based implementation doesn't show
32 namespaces at all. Implementation based on lxml.etree shows namespaces in a
33 separate file "namespaces".
35 It is useful to have a top-down view on an XML structure but it's especially
36 convenient to extract text values from tags. One can get, for example, a
37 base64-encoded image - just walk down the VFS to the tag's directory and copy
38 its text file to a real file.
40 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
45 __author__ = "Oleg Broytman <phd@phdru.name>"
46 __copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design"
49 # Can be None for default choice, 'lxml', 'elementtree' or 'minidom'.
50 force_implementation = None
53 use_elementtree = False
57 from os.path import getmtime
59 from time import localtime
60 import xml.dom.minidom
63 import xml.etree.ElementTree as ET
67 use_elementtree = True
70 import lxml.etree as etree
83 # Get the default charset.
85 lcAll = locale.getdefaultlocale()
86 except locale.Error, err:
87 print >>sys.stderr, "WARNING:", err
91 default_encoding = lcAll[1]
94 default_encoding = locale.getpreferredencoding()
95 except locale.Error, err:
96 print >>sys.stderr, "WARNING:", err
97 default_encoding = sys.getdefaultencoding()
99 default_encoding = sys.getdefaultencoding()
102 logger = logging.getLogger('xml-mcextfs')
103 log_err_handler = logging.StreamHandler(sys.stderr)
104 logger.addHandler(log_err_handler)
105 logger.setLevel(logging.INFO)
107 if len(sys.argv) < 3:
109 XML Virtual FileSystem for Midnight Commander version %s
113 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
114 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
115 __version__, __author__, __copyright__)
119 locale.setlocale(locale.LC_ALL, '')
122 class XmlVfs(object):
123 """Abstract base class"""
125 supports_namespaces = False
128 self.xml_file = sys.argv[2]
132 Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5]
133 self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M)
135 root_comments = self.get_root_comments()
137 print "-r--r--r-- 1 user group %d %s text" % (
138 len(root_comments), self.xml_file_dt)
140 self._list(self.getroot())
142 def _list(self, node, path=''):
143 n = len(self.getchildren(node))
145 width = int(math.log10(n)) + 1
146 template = "%%0%dd" % width
150 for element in self.getchildren(node):
151 if not self.istag(element):
154 tag = self.getlocalname(self.gettag(element))
156 subpath = '%s/%s %s' % (path, template % n, tag)
158 subpath = '%s %s' % (template % n, tag)
159 subpath_encoded = subpath.encode(default_encoding, "replace")
160 print "dr-xr-xr-x 1 user group 0 %s %s" % (
161 self.xml_file_dt, subpath_encoded)
162 if self.getattrs(element):
163 attr_text = self.attrs2text(element)
164 print "-r--r--r-- 1 user group %d %s %s/attributes" % (
165 len(attr_text), self.xml_file_dt, subpath_encoded)
166 if self.supports_namespaces and self.has_ns(element):
167 ns_text = self.ns2text(element)
168 print "-r--r--r-- 1 user group %d %s %s/namespaces" % (
169 len(ns_text), self.xml_file_dt, subpath_encoded)
170 text = self.collect_text(element)
172 print "-r--r--r-- 1 user group %d %s %s/text" % (
173 len(text), self.xml_file_dt, subpath_encoded)
174 self._list(element, subpath)
176 def get_tag_node(self, node, i):
178 for element in self.getchildren(node):
179 if self.istag(element):
183 xml_error('There are less than %d nodes' % i)
185 def attrs2text(self, node):
186 attr_accumulator = []
187 for name, value in self.getattrs(node):
188 name = self.getlocalname(name).encode(default_encoding, "replace")
189 value = value.encode(default_encoding, "replace")
190 attr_accumulator.append("%s=%s" % (name, value))
191 return '\n'.join(attr_accumulator)
193 def has_ns(self, node):
197 class MiniDOMXmlVfs(XmlVfs):
199 self.document = xml.dom.minidom.parse(self.xml_file)
201 def getattrs(self, node):
202 attrs = node.attributes
203 attrs = [attrs.item(i) for i in range(attrs.length)]
204 return [(a.name, a.value) for a in attrs]
206 def collect_text(self, node):
207 text_accumulator = []
208 for element in node.childNodes:
209 if element.localName:
211 elif element.nodeType == element.COMMENT_NODE:
212 text = u"<!--%s-->" % element.nodeValue
213 elif element.nodeType == element.TEXT_NODE:
214 text = element.nodeValue.strip()
216 xml_error("Unknown node type %d" % element.nodeType)
218 text_accumulator.append(text)
219 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
224 def get_root_comments(self):
225 return self.collect_text(self.document)
227 def getchildren(self, node):
228 return node.childNodes
230 def gettag(self, node):
231 return node.localName
233 def istag(self, node):
234 return bool(node.localName)
236 def getlocalname(self, name):
240 if use_elementtree or use_lxml:
241 class CommonEtreeXmlVfs(XmlVfs):
242 def getattrs(self, node):
243 return node.attrib.items()
245 def collect_text(self, node):
246 text_accumulator = []
248 text = node.text.strip()
250 text_accumulator.append(text)
252 if not self.istag(element):
253 text = u"<!--%s-->" % element.text
254 text_accumulator.append(text)
256 text = node.tail.strip()
258 text_accumulator.append(text)
259 return '\n'.join(text_accumulator).encode(
260 default_encoding, "replace")
262 def getchildren(self, node):
265 def gettag(self, node):
268 def istag(self, node):
269 return isinstance(node.tag, basestring)
273 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
275 # Copied from http://effbot.org/zone/element-pi.htm
277 class PIParser(ET.XMLTreeBuilder):
280 ET.XMLTreeBuilder.__init__(self)
281 # assumes ElementTree 1.2.X
282 self._parser.CommentHandler = self.handle_comment
283 self._parser.ProcessingInstructionHandler = self.handle_pi
284 self._target.start("document", {})
287 self._target.end("document")
288 return ET.XMLTreeBuilder.close(self)
290 def handle_comment(self, data):
291 self._target.start(ET.Comment, {})
292 self._target.data(data)
293 self._target.end(ET.Comment)
295 def handle_pi(self, target, data):
296 self._target.start(ET.PI, {})
297 self._target.data(target + " " + data)
298 self._target.end(ET.PI)
300 self.document = ET.parse(self.xml_file, PIParser())
303 return self.document.getroot()
305 def get_root_comments(self):
306 text_accumulator = []
307 for element in self.getroot():
308 if not self.istag(element):
309 text = u"<!--%s-->" % element.text
310 text_accumulator.append(text)
311 return '\n'.join(text_accumulator).encode(
312 default_encoding, "replace")
314 def getlocalname(self, name):
315 if name.startswith('{'):
316 name = name.split('}', 1)[1] # Remove XML namespace
321 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
322 supports_namespaces = True
325 self.document = etree.parse(self.xml_file)
328 return [self.document.getroot()]
330 def get_root_comments(self):
331 text_accumulator = []
332 for element in self.document.getroot().itersiblings(
333 tag=etree.Comment, preceding=True):
334 text = u"<!--%s-->" % element.text
335 text_accumulator.append(text)
336 return '\n'.join(text_accumulator).encode(
337 default_encoding, "replace")
339 def getlocalname(self, name):
340 return etree.QName(name).localname
342 def _get_local_ns(self, node):
343 this_nsmap = node.nsmap
344 parent = node.getparent()
345 if parent is not None:
346 parent_nsmap = parent.nsmap
347 for key in parent_nsmap:
348 if this_nsmap[key] == parent_nsmap[key]:
352 def has_ns(self, node):
353 return bool(self._get_local_ns(node))
355 def ns2text(self, node):
357 for name, value in self._get_local_ns(node).items():
359 name = name.encode(default_encoding, "replace")
362 value = value.encode(default_encoding, "replace")
363 ns_accumulator.append("%s=%s" % (name, value))
364 return '\n'.join(ns_accumulator)
368 if force_implementation is None:
370 return LxmlEtreeXmlVfs()
371 elif use_elementtree:
372 return ElementTreeXmlVfs()
374 return MiniDOMXmlVfs()
375 elif force_implementation == 'minidom':
376 return MiniDOMXmlVfs()
377 elif force_implementation == 'elementtree':
378 return ElementTreeXmlVfs()
379 elif force_implementation == 'lxml':
380 return LxmlEtreeXmlVfs()
382 raise ValueError('Unknown implementation "%s", expected "minidom", '
383 '"elementtree" or "lxml"' % force_implementation)
387 """List the entire VFS"""
389 xmlvfs = build_xmlvfs()
394 """Extract a file from the VFS"""
396 xmlvfs = build_xmlvfs()
397 xml_filename = sys.argv[3]
398 real_filename = sys.argv[4]
400 node = xmlvfs.getroot()
401 for path_comp in xml_filename.split('/'):
403 i = int(path_comp.split(' ', 1)[0])
404 node = xmlvfs.get_tag_node(node, i)
405 elif path_comp in ('attributes', 'namespaces', 'text'):
408 xml_error('Unknown file')
410 if path_comp == 'attributes':
411 if xmlvfs.getattrs(node):
412 text = xmlvfs.attrs2text(node)
414 xml_error('There are no attributes')
416 elif path_comp == 'namespaces':
417 if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
418 text = xmlvfs.ns2text(node)
420 xml_error('There are no namespaces')
422 elif path_comp == 'text':
423 if '/' in xml_filename:
424 text = xmlvfs.collect_text(node)
426 text = xmlvfs.get_root_comments()
429 xml_error('Unknown file')
431 outfile = open(real_filename, 'w')
437 """Put a file to the VFS"""
438 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
442 """Remove a file from the VFS"""
443 sys.exit("XML VFS doesn't support removing files/directories "
444 "(read-only filesystem)")
446 mcxml_rmdir = mcxml_rm
450 """Create a directory in the VFS"""
451 sys.exit("XML VFS doesn't support creating directories "
452 "(read-only filesystem)")
455 def xml_error(error_str):
456 logger.critical("Error walking XML file: %s", error_str)
459 command = sys.argv[1]
460 procname = "mcxml_" + command
463 if procname not in g:
464 logger.critical("Unknown command %s", command)
472 logger.exception("Error during run")