2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; in older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 See detailed installation instructions at
17 http://phdru.name/Software/mc/xml_INSTALL.html.
19 The VFS represents tags as directories; the directories are numbered to
20 distinguish tags with the same name; numbering also helps to sort tags by their
21 order in XML instead of sorting them by name and prevents name clash when tag
22 names coincide with the names of special files used by XML VFS. Attributes,
23 text nodes and comments are represented as text files; attributes are shown in
24 a file named "attributes", attributes are listed in the file as name=value
25 lines (I deliberately ignore a small chance of newline characters in values);
26 names and values are reencoded to the console encoding. Text nodes and comments
27 are collected in a file named "text", stripped and reencoded. The filesystem is
30 Implementation based on minidom doesn't understand namespaces, it just shows
31 them among other attributes. ElementTree-based implementation doesn't show
32 namespaces at all. Implementation based on lxml.etree shows namespaces in a
33 separate file "namespaces".
35 It is useful to have a top-down view on an XML structure but it's especially
36 convenient to extract text values from tags. One can get, for example, a
37 base64-encoded image - just walk down the VFS to the tag's directory and copy
38 its text file to a real file.
40 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
45 __author__ = "Oleg Broytman <phd@phdru.name>"
46 __copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design"
49 force_implementation = None # Can be None for default choice,
50 # 'lxml', 'elementtree' or 'minidom'
53 use_elementtree = False
57 from os.path import getmtime
59 from time import localtime
60 import xml.dom.minidom
63 import xml.etree.ElementTree as ET
67 use_elementtree = True
70 import lxml.etree as etree
83 # Get the default charset.
85 lcAll = locale.getdefaultlocale()
86 except locale.Error, err:
87 print >>sys.stderr, "WARNING:", err
91 default_encoding = lcAll[1]
94 default_encoding = locale.getpreferredencoding()
95 except locale.Error, err:
96 print >>sys.stderr, "WARNING:", err
97 default_encoding = sys.getdefaultencoding()
99 default_encoding = sys.getdefaultencoding()
102 logger = logging.getLogger('xml-mcextfs')
103 log_err_handler = logging.StreamHandler(sys.stderr)
104 logger.addHandler(log_err_handler)
105 logger.setLevel(logging.INFO)
107 if len(sys.argv) < 3:
109 XML Virtual FileSystem for Midnight Commander version %s
113 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
114 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
115 __version__, __author__, __copyright__
120 locale.setlocale(locale.LC_ALL, '')
123 class XmlVfs(object):
124 """Abstract base class"""
126 supports_namespaces = False
129 self.xml_file = sys.argv[2]
133 Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5]
134 self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M)
136 root_comments = self.get_root_comments()
138 print "-r--r--r-- 1 user group %d %s text" % (
139 len(root_comments), self.xml_file_dt)
141 self._list(self.getroot())
143 def _list(self, node, path=''):
144 n = len(self.getchildren(node))
146 width = int(math.log10(n)) + 1
147 template = "%%0%dd" % width
151 for element in self.getchildren(node):
152 if not self.istag(element):
155 tag = self.getlocalname(self.gettag(element))
157 subpath = '%s/%s %s' % (path, template % n, tag)
159 subpath = '%s %s' % (template % n, tag)
160 subpath_encoded = subpath.encode(default_encoding, "replace")
161 print "dr-xr-xr-x 1 user group 0 %s %s" % (
162 self.xml_file_dt, subpath_encoded)
163 if self.getattrs(element):
164 attr_text = self.attrs2text(element)
165 print "-r--r--r-- 1 user group %d %s %s/attributes" % (
166 len(attr_text), self.xml_file_dt, subpath_encoded)
167 if self.supports_namespaces and self.has_ns(element):
168 ns_text = self.ns2text(element)
169 print "-r--r--r-- 1 user group %d %s %s/namespaces" % (
170 len(ns_text), self.xml_file_dt, subpath_encoded)
171 text = self.collect_text(element)
173 print "-r--r--r-- 1 user group %d %s %s/text" % (
174 len(text), self.xml_file_dt, subpath_encoded)
175 self._list(element, subpath)
177 def get_tag_node(self, node, i):
179 for element in self.getchildren(node):
180 if self.istag(element):
184 xml_error('There are less than %d nodes' % i)
186 def attrs2text(self, node):
187 attr_accumulator = []
188 for name, value in self.getattrs(node):
189 name = self.getlocalname(name).encode(default_encoding, "replace")
190 value = value.encode(default_encoding, "replace")
191 attr_accumulator.append("%s=%s" % (name, value))
192 return '\n'.join(attr_accumulator)
194 def has_ns(self, node):
198 class MiniDOMXmlVfs(XmlVfs):
200 self.document = xml.dom.minidom.parse(self.xml_file)
202 def getattrs(self, node):
203 attrs = node.attributes
204 attrs = [attrs.item(i) for i in range(attrs.length)]
205 return [(a.name, a.value) for a in attrs]
207 def collect_text(self, node):
208 text_accumulator = []
209 for element in node.childNodes:
210 if element.localName:
212 elif element.nodeType == element.COMMENT_NODE:
213 text = u"<!--%s-->" % element.nodeValue
214 elif element.nodeType == element.TEXT_NODE:
215 text = element.nodeValue.strip()
217 xml_error("Unknown node type %d" % element.nodeType)
218 if text: text_accumulator.append(text)
219 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
224 def get_root_comments(self):
225 return self.collect_text(self.document)
227 def getchildren(self, node):
228 return node.childNodes
230 def gettag(self, node):
231 return node.localName
233 def istag(self, node):
234 return bool(node.localName)
236 def getlocalname(self, name):
240 if use_elementtree or use_lxml:
241 class CommonEtreeXmlVfs(XmlVfs):
242 def getattrs(self, node):
243 return node.attrib.items()
245 def collect_text(self, node):
246 text_accumulator = []
248 text = node.text.strip()
249 if text: text_accumulator.append(text)
251 if not self.istag(element):
252 text = u"<!--%s-->" % element.text
253 text_accumulator.append(text)
255 text = node.tail.strip()
256 if text: text_accumulator.append(text)
257 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
259 def getchildren(self, node):
262 def gettag(self, node):
265 def istag(self, node):
266 return isinstance(node.tag, basestring)
270 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
272 # Copied from http://effbot.org/zone/element-pi.htm
274 class PIParser(ET.XMLTreeBuilder):
277 ET.XMLTreeBuilder.__init__(self)
278 # assumes ElementTree 1.2.X
279 self._parser.CommentHandler = self.handle_comment
280 self._parser.ProcessingInstructionHandler = self.handle_pi
281 self._target.start("document", {})
284 self._target.end("document")
285 return ET.XMLTreeBuilder.close(self)
287 def handle_comment(self, data):
288 self._target.start(ET.Comment, {})
289 self._target.data(data)
290 self._target.end(ET.Comment)
292 def handle_pi(self, target, data):
293 self._target.start(ET.PI, {})
294 self._target.data(target + " " + data)
295 self._target.end(ET.PI)
297 self.document = ET.parse(self.xml_file, PIParser())
300 return self.document.getroot()
302 def get_root_comments(self):
303 text_accumulator = []
304 for element in self.getroot():
305 if not self.istag(element):
306 text = u"<!--%s-->" % element.text
307 text_accumulator.append(text)
308 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
310 def getlocalname(self, name):
311 if name.startswith('{'):
312 name = name.split('}', 1)[1] # Remove XML namespace
317 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
318 supports_namespaces = True
321 self.document = etree.parse(self.xml_file)
324 return [self.document.getroot()]
326 def get_root_comments(self):
327 text_accumulator = []
328 for element in self.document.getroot().itersiblings(tag=etree.Comment, preceding=True):
329 text = u"<!--%s-->" % element.text
330 text_accumulator.append(text)
331 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
333 def getlocalname(self, name):
334 return etree.QName(name).localname
336 def _get_local_ns(self, node):
337 this_nsmap = node.nsmap
338 parent = node.getparent()
339 if parent is not None:
340 parent_nsmap = parent.nsmap
341 for key in parent_nsmap:
342 if this_nsmap[key] == parent_nsmap[key]:
346 def has_ns(self, node):
347 return bool(self._get_local_ns(node))
349 def ns2text(self, node):
351 for name, value in self._get_local_ns(node).items():
353 name = name.encode(default_encoding, "replace")
356 value = value.encode(default_encoding, "replace")
357 ns_accumulator.append("%s=%s" % (name, value))
358 return '\n'.join(ns_accumulator)
362 if force_implementation is None:
364 return LxmlEtreeXmlVfs()
365 elif use_elementtree:
366 return ElementTreeXmlVfs()
368 return MiniDOMXmlVfs()
369 elif force_implementation == 'minidom':
370 return MiniDOMXmlVfs()
371 elif force_implementation == 'elementtree':
372 return ElementTreeXmlVfs()
373 elif force_implementation == 'lxml':
374 return LxmlEtreeXmlVfs()
376 raise ValueError('Unknown implementation "%s", expected "minidom", "elementtree" or "lxml"' % force_implementation)
380 """List the entire VFS"""
382 xmlvfs = build_xmlvfs()
387 """Extract a file from the VFS"""
389 xmlvfs = build_xmlvfs()
390 xml_filename = sys.argv[3]
391 real_filename = sys.argv[4]
393 node = xmlvfs.getroot()
394 for path_comp in xml_filename.split('/'):
396 i = int(path_comp.split(' ', 1)[0])
397 node = xmlvfs.get_tag_node(node, i)
398 elif path_comp in ('attributes', 'namespaces', 'text'):
401 xml_error('Unknown file')
403 if path_comp == 'attributes':
404 if xmlvfs.getattrs(node):
405 text = xmlvfs.attrs2text(node)
407 xml_error('There are no attributes')
409 elif path_comp == 'namespaces':
410 if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
411 text = xmlvfs.ns2text(node)
413 xml_error('There are no namespaces')
415 elif path_comp == 'text':
416 if '/' in xml_filename:
417 text = xmlvfs.collect_text(node)
419 text = xmlvfs.get_root_comments()
422 xml_error('Unknown file')
424 outfile = open(real_filename, 'w')
430 """Put a file to the VFS"""
431 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
434 """Remove a file from the VFS"""
435 sys.exit("XML VFS doesn't support removing files/directories (read-only filesystem)")
437 mcxml_rmdir = mcxml_rm
440 """Create a directory in the VFS"""
441 sys.exit("XML VFS doesn't support creating directories (read-only filesystem)")
444 def xml_error(error_str):
445 logger.critical("Error walking XML file: %s", error_str)
448 command = sys.argv[1]
449 procname = "mcxml_" + command
452 if not g.has_key(procname):
453 logger.critical("Unknown command %s", command)
461 logger.exception("Error during run")