1 #! /usr/bin/env python3
2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; in older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 See detailed installation instructions at
17 https://phdru.name/Software/mc/xml_INSTALL.html.
19 The VFS represents tags as directories; the directories are numbered to
20 distinguish tags with the same name; numbering also helps to sort tags by their
21 order in XML instead of sorting them by name and prevents name clash when tag
22 names coincide with the names of special files used by XML VFS. Attributes,
23 text nodes and comments are represented as text files; attributes are shown in
24 a file named "attributes", attributes are listed in the file as name=value
25 lines (I deliberately ignore a small chance of newline characters in values);
26 names and values are reencoded to the console encoding. Text nodes and comments
27 are collected in a file named "text", stripped and reencoded. The filesystem is
30 Date/time for all directories/files set to the last modification time of the
33 Implementation based on minidom doesn't understand namespaces, it just shows
34 them among other attributes. ElementTree-based implementation doesn't show
35 namespaces at all. Implementation based on lxml.etree shows namespaces in a
36 separate file "namespaces".
38 It is useful to have a top-down view on an XML structure but it's especially
39 convenient to extract text values from tags. One can get, for example, a
40 base64-encoded image - just walk down the VFS to the tag's directory and copy
41 its text file to a real file.
43 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
48 __author__ = "Oleg Broytman <phd@phdru.name>"
49 __copyright__ = "Copyright (C) 2013-2023 PhiloSoft Design"
52 # Can be None for default choice, 'lxml', 'elementtree' or 'minidom'.
53 force_implementation = None
56 use_elementtree = False
60 from os.path import getmtime
62 from time import localtime
63 import xml.dom.minidom
65 if sys.version_info[0] == 2:
67 import xml.etree.ElementTree as ET
71 use_elementtree = True
74 import lxml.etree as etree
87 # Get the default charset.
89 lcAll = locale.getdefaultlocale()
90 except locale.Error as err:
91 print("WARNING:", err, file=sys.stderr)
95 default_encoding = lcAll[1]
98 default_encoding = locale.getpreferredencoding()
99 except locale.Error as err:
100 print("WARNING:", err, file=sys.stderr)
101 default_encoding = sys.getdefaultencoding()
103 default_encoding = sys.getdefaultencoding()
106 logger = logging.getLogger('xml-mcextfs')
107 log_err_handler = logging.StreamHandler(sys.stderr)
108 logger.addHandler(log_err_handler)
109 logger.setLevel(logging.INFO)
111 if len(sys.argv) < 3:
113 XML Virtual FileSystem for Midnight Commander version %s
117 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
118 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
119 __version__, __author__, __copyright__)
123 locale.setlocale(locale.LC_ALL, '')
126 class XmlVfs(object):
127 """Abstract base class"""
129 supports_namespaces = False
132 self.xml_file = sys.argv[2]
136 Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5]
137 self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M)
139 root_comments = self.get_root_comments()
141 print("-r--r--r-- 1 user group %d %s text" % (
142 len(root_comments), self.xml_file_dt))
144 self._list(self.getroot())
146 def _list(self, node, path=''):
147 n = len(self.getchildren(node))
149 width = int(math.log10(n)) + 1
150 template = "%%0%dd" % width
154 for element in self.getchildren(node):
155 if not self.istag(element):
158 tag = self.getlocalname(self.gettag(element))
160 subpath = '%s/%s %s' % (path, template % n, tag)
162 subpath = '%s %s' % (template % n, tag)
163 subpath_encoded = subpath.encode(default_encoding, "replace")
164 print("dr-xr-xr-x 1 user group 0 %s %s" % (
165 self.xml_file_dt, subpath_encoded))
166 if self.getattrs(element):
167 attr_text = self.attrs2text(element)
168 print("-r--r--r-- 1 user group %d %s %s/attributes" % (
169 len(attr_text), self.xml_file_dt, subpath_encoded))
170 if self.supports_namespaces and self.has_ns(element):
171 ns_text = self.ns2text(element)
172 print("-r--r--r-- 1 user group %d %s %s/namespaces" % (
173 len(ns_text), self.xml_file_dt, subpath_encoded))
174 text = self.collect_text(element)
176 print("-r--r--r-- 1 user group %d %s %s/text" % (
177 len(text), self.xml_file_dt, subpath_encoded))
178 self._list(element, subpath)
180 def get_tag_node(self, node, i):
182 for element in self.getchildren(node):
183 if self.istag(element):
187 xml_error('There are less than %d nodes' % i)
189 def attrs2text(self, node):
190 attr_accumulator = []
191 for name, value in self.getattrs(node):
192 name = self.getlocalname(name).encode(default_encoding, "replace")
193 value = value.encode(default_encoding, "replace")
194 attr_accumulator.append("%s=%s" % (name, value))
195 return '\n'.join(attr_accumulator)
197 def has_ns(self, node):
201 class MiniDOMXmlVfs(XmlVfs):
203 self.document = xml.dom.minidom.parse(self.xml_file)
205 def getattrs(self, node):
206 attrs = node.attributes
207 attrs = [attrs.item(i) for i in range(attrs.length)]
208 return [(a.name, a.value) for a in attrs]
210 def collect_text(self, node):
211 text_accumulator = []
212 for element in node.childNodes:
213 if element.localName:
215 elif element.nodeType == element.COMMENT_NODE:
216 text = u"<!--%s-->" % element.nodeValue
217 elif element.nodeType == element.TEXT_NODE:
218 text = element.nodeValue.strip()
220 xml_error("Unknown node type %d" % element.nodeType)
222 text_accumulator.append(text)
223 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
228 def get_root_comments(self):
229 return self.collect_text(self.document)
231 def getchildren(self, node):
232 return node.childNodes
234 def gettag(self, node):
235 return node.localName
237 def istag(self, node):
238 return bool(node.localName)
240 def getlocalname(self, name):
244 if use_elementtree or use_lxml:
245 class CommonEtreeXmlVfs(XmlVfs):
246 def getattrs(self, node):
247 return node.attrib.items()
249 def collect_text(self, node):
250 text_accumulator = []
252 text = node.text.strip()
254 text_accumulator.append(text)
256 if not self.istag(element):
257 text = u"<!--%s-->" % element.text
258 text_accumulator.append(text)
260 text = node.tail.strip()
262 text_accumulator.append(text)
263 return '\n'.join(text_accumulator).encode(
264 default_encoding, "replace")
266 def getchildren(self, node):
269 def gettag(self, node):
272 def istag(self, node):
273 return isinstance(node.tag, basestring)
277 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
279 # Copied from http://effbot.org/zone/element-pi.htm
281 class PIParser(ET.XMLTreeBuilder):
284 ET.XMLTreeBuilder.__init__(self)
285 # assumes ElementTree 1.2.X
286 self._parser.CommentHandler = self.handle_comment
287 self._parser.ProcessingInstructionHandler = self.handle_pi
288 self._target.start("document", {})
291 self._target.end("document")
292 return ET.XMLTreeBuilder.close(self)
294 def handle_comment(self, data):
295 self._target.start(ET.Comment, {})
296 self._target.data(data)
297 self._target.end(ET.Comment)
299 def handle_pi(self, target, data):
300 self._target.start(ET.PI, {})
301 self._target.data(target + " " + data)
302 self._target.end(ET.PI)
304 self.document = ET.parse(self.xml_file, PIParser())
307 return self.document.getroot()
309 def get_root_comments(self):
310 text_accumulator = []
311 for element in self.getroot():
312 if not self.istag(element):
313 text = u"<!--%s-->" % element.text
314 text_accumulator.append(text)
315 return '\n'.join(text_accumulator).encode(
316 default_encoding, "replace")
318 def getlocalname(self, name):
319 if name.startswith('{'):
320 name = name.split('}', 1)[1] # Remove XML namespace
325 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
326 supports_namespaces = True
329 self.document = etree.parse(self.xml_file)
332 return [self.document.getroot()]
334 def get_root_comments(self):
335 text_accumulator = []
336 for element in self.document.getroot().itersiblings(
337 tag=etree.Comment, preceding=True):
338 text = u"<!--%s-->" % element.text
339 text_accumulator.append(text)
340 return '\n'.join(text_accumulator).encode(
341 default_encoding, "replace")
343 def getlocalname(self, name):
344 return etree.QName(name).localname
346 def _get_local_ns(self, node):
347 this_nsmap = node.nsmap
348 parent = node.getparent()
349 if parent is not None:
350 parent_nsmap = parent.nsmap
351 for key in parent_nsmap:
352 if this_nsmap[key] == parent_nsmap[key]:
356 def has_ns(self, node):
357 return bool(self._get_local_ns(node))
359 def ns2text(self, node):
361 for name, value in self._get_local_ns(node).items():
363 name = name.encode(default_encoding, "replace")
366 value = value.encode(default_encoding, "replace")
367 ns_accumulator.append("%s=%s" % (name, value))
368 return '\n'.join(ns_accumulator)
372 if force_implementation is None:
374 return LxmlEtreeXmlVfs()
375 elif use_elementtree:
376 return ElementTreeXmlVfs()
378 return MiniDOMXmlVfs()
379 elif force_implementation == 'minidom':
380 return MiniDOMXmlVfs()
381 elif force_implementation == 'elementtree':
382 return ElementTreeXmlVfs()
383 elif force_implementation == 'lxml':
384 return LxmlEtreeXmlVfs()
386 raise ValueError('Unknown implementation "%s", expected "minidom", '
387 '"elementtree" or "lxml"' % force_implementation)
391 """List the entire VFS"""
393 xmlvfs = build_xmlvfs()
398 """Extract a file from the VFS"""
400 xmlvfs = build_xmlvfs()
401 xml_filename = sys.argv[3]
402 real_filename = sys.argv[4]
404 node = xmlvfs.getroot()
405 for path_comp in xml_filename.split('/'):
407 i = int(path_comp.split(' ', 1)[0])
408 node = xmlvfs.get_tag_node(node, i)
409 elif path_comp in ('attributes', 'namespaces', 'text'):
412 xml_error('Unknown file')
414 if path_comp == 'attributes':
415 if xmlvfs.getattrs(node):
416 text = xmlvfs.attrs2text(node)
418 xml_error('There are no attributes')
420 elif path_comp == 'namespaces':
421 if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
422 text = xmlvfs.ns2text(node)
424 xml_error('There are no namespaces')
426 elif path_comp == 'text':
427 if '/' in xml_filename:
428 text = xmlvfs.collect_text(node)
430 text = xmlvfs.get_root_comments()
433 xml_error('Unknown file')
435 outfile = open(real_filename, 'w')
441 """Put a file to the VFS"""
442 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
446 """Remove a file from the VFS"""
447 sys.exit("XML VFS doesn't support removing files/directories "
448 "(read-only filesystem)")
450 mcxml_rmdir = mcxml_rm
454 """Create a directory in the VFS"""
455 sys.exit("XML VFS doesn't support creating directories "
456 "(read-only filesystem)")
459 def xml_error(error_str):
460 logger.critical("Error walking XML file: %s", error_str)
463 command = sys.argv[1]
464 procname = "mcxml_" + command
467 if procname not in g:
468 logger.critical("Unknown command %s", command)
476 logger.exception("Error during run")