1 #! /usr/bin/env python3
2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; in older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 See detailed installation instructions at
17 https://phdru.name/Software/mc/xml_INSTALL.html.
19 The VFS represents tags as directories; the directories are numbered to
20 distinguish tags with the same name; numbering also helps to sort tags by their
21 order in XML instead of sorting them by name and prevents name clash when tag
22 names coincide with the names of special files used by XML VFS. Attributes,
23 text nodes and comments are represented as text files; attributes are shown in
24 a file named "attributes", attributes are listed in the file as name=value
25 lines (I deliberately ignore a small chance of newline characters in values);
26 names and values are reencoded to the console encoding. Text nodes and comments
27 are collected in a file named "text", stripped and reencoded. The filesystem is
30 Date/time for all directories/files set to the last modification time of the
33 Implementation based on minidom doesn't understand namespaces, it just shows
34 them among other attributes. ElementTree-based implementation doesn't show
35 namespaces at all. Implementation based on lxml.etree shows namespaces in a
36 separate file "namespaces".
38 It is useful to have a top-down view on an XML structure but it's especially
39 convenient to extract text values from tags. One can get, for example, a
40 base64-encoded image - just walk down the VFS to the tag's directory and copy
41 its text file to a real file.
43 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
46 from __future__ import print_function
49 __author__ = "Oleg Broytman <phd@phdru.name>"
50 __copyright__ = "Copyright (C) 2013-2023 PhiloSoft Design"
53 # Can be None for default choice, 'lxml', 'elementtree' or 'minidom'.
54 force_implementation = None
57 use_elementtree = False
61 from os.path import getmtime
63 from time import localtime
64 import xml.dom.minidom
67 import lxml.etree as etree
80 # Get the default charset.
82 if sys.version_info[:2] < (3, 11):
83 lcAll = locale.getdefaultlocale()
86 except locale.Error as err:
87 print("WARNING:", err, file=sys.stderr)
91 default_encoding = lcAll[1]
94 default_encoding = locale.getpreferredencoding()
95 except locale.Error as err:
96 print("WARNING:", err, file=sys.stderr)
97 default_encoding = sys.getdefaultencoding()
99 default_encoding = sys.getdefaultencoding()
102 logger = logging.getLogger('xml-mcextfs')
103 log_err_handler = logging.StreamHandler(sys.stderr)
104 logger.addHandler(log_err_handler)
105 logger.setLevel(logging.INFO)
107 if len(sys.argv) < 3:
109 XML Virtual FileSystem for Midnight Commander version %s
113 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
114 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
115 __version__, __author__, __copyright__)
119 locale.setlocale(locale.LC_ALL, '')
121 PY3 = (sys.version_info[0] >= 3)
124 sys.stdout.buffer.write(s.encode(default_encoding, 'replace') + b'\n')
127 sys.stdout.write(s + '\n')
130 import xml.etree.ElementTree as ET
134 use_elementtree = True
137 class XmlVfs(object):
138 """Abstract base class"""
140 supports_namespaces = False
143 self.xml_file = sys.argv[2]
147 Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5]
148 self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M)
150 root_comments = self.get_root_comments()
152 output("-r--r--r-- 1 user group %d %s text" % (
153 len(root_comments), self.xml_file_dt))
155 self._list(self.getroot())
157 def _list(self, node, path=''):
158 n = len(self.getchildren(node))
160 width = int(math.log10(n)) + 1
161 template = "%%0%dd" % width
165 for element in self.getchildren(node):
166 if not self.istag(element):
169 tag = self.getlocalname(self.gettag(element))
171 subpath = '%s/%s %s' % (path, template % n, tag)
173 subpath = '%s %s' % (template % n, tag)
174 output("dr-xr-xr-x 1 user group 0 %s %s" % (
175 self.xml_file_dt, subpath))
176 if self.getattrs(element):
177 attr_text = self.attrs2text(element)
178 output("-r--r--r-- 1 user group %d %s %s/attributes" % (
179 len(attr_text), self.xml_file_dt, subpath))
180 if self.supports_namespaces and self.has_ns(element):
181 ns_text = self.ns2text(element)
182 output("-r--r--r-- 1 user group %d %s %s/namespaces" % (
183 len(ns_text), self.xml_file_dt, subpath))
184 text = self.collect_text(element)
186 output("-r--r--r-- 1 user group %d %s %s/text" % (
187 len(text), self.xml_file_dt, subpath))
188 self._list(element, subpath)
190 def get_tag_node(self, node, i):
192 for element in self.getchildren(node):
193 if self.istag(element):
197 xml_error('There are less than %d nodes' % i)
199 def attrs2text(self, node):
200 attr_accumulator = []
201 for name, value in self.getattrs(node):
202 name = self.getlocalname(name)
203 attr_accumulator.append("%s=%s" % (name, value))
204 return '\n'.join(attr_accumulator)
206 def has_ns(self, node):
210 class MiniDOMXmlVfs(XmlVfs):
212 self.document = xml.dom.minidom.parse(self.xml_file)
214 def getattrs(self, node):
215 attrs = node.attributes
216 attrs = [attrs.item(i) for i in range(attrs.length)]
217 return [(a.name, a.value) for a in attrs]
219 def collect_text(self, node):
220 text_accumulator = []
221 for element in node.childNodes:
222 if element.localName:
224 elif element.nodeType == element.COMMENT_NODE:
225 text = u"<!--%s-->" % element.nodeValue
226 elif element.nodeType == element.TEXT_NODE:
227 text = element.nodeValue.strip()
229 xml_error("Unknown node type %d" % element.nodeType)
231 text_accumulator.append(text)
232 return '\n'.join(text_accumulator)
237 def get_root_comments(self):
238 return self.collect_text(self.document)
240 def getchildren(self, node):
241 return node.childNodes
243 def gettag(self, node):
244 return node.localName
246 def istag(self, node):
247 return bool(node.localName)
249 def getlocalname(self, name):
253 if use_elementtree or use_lxml:
254 class CommonEtreeXmlVfs(XmlVfs):
255 def getattrs(self, node):
256 return node.attrib.items()
258 def collect_text(self, node):
259 text_accumulator = []
261 text = node.text.strip()
263 text_accumulator.append(text)
265 if not self.istag(element):
266 text = u"<!--%s-->" % element.text
267 text_accumulator.append(text)
269 text = node.tail.strip()
271 text_accumulator.append(text)
272 return '\n'.join(text_accumulator)
274 def getchildren(self, node):
277 def gettag(self, node):
280 def istag(self, node):
281 return isinstance(node.tag, basestring)
285 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
287 # Copied from http://effbot.org/zone/element-pi.htm
289 class PIParser(ET.XMLTreeBuilder):
292 ET.XMLTreeBuilder.__init__(self)
293 # assumes ElementTree 1.2.X
294 self._parser.CommentHandler = self.handle_comment
295 self._parser.ProcessingInstructionHandler = self.handle_pi
296 self._target.start("document", {})
299 self._target.end("document")
300 return ET.XMLTreeBuilder.close(self)
302 def handle_comment(self, data):
303 self._target.start(ET.Comment, {})
304 self._target.data(data)
305 self._target.end(ET.Comment)
307 def handle_pi(self, target, data):
308 self._target.start(ET.PI, {})
309 self._target.data(target + " " + data)
310 self._target.end(ET.PI)
312 self.document = ET.parse(self.xml_file, PIParser())
315 return self.document.getroot()
317 def get_root_comments(self):
318 text_accumulator = []
319 for element in self.getroot():
320 if not self.istag(element):
321 text = u"<!--%s-->" % element.text
322 text_accumulator.append(text)
323 return '\n'.join(text_accumulator)
325 def getlocalname(self, name):
326 if name.startswith('{'):
327 name = name.split('}', 1)[1] # Remove XML namespace
332 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
333 supports_namespaces = True
336 self.document = etree.parse(self.xml_file)
339 return [self.document.getroot()]
341 def get_root_comments(self):
342 text_accumulator = []
343 for element in self.document.getroot().itersiblings(
344 tag=etree.Comment, preceding=True):
345 text = u"<!--%s-->" % element.text
346 text_accumulator.append(text)
347 return '\n'.join(text_accumulator)
349 def getlocalname(self, name):
350 return etree.QName(name).localname
352 def _get_local_ns(self, node):
353 this_nsmap = node.nsmap
354 parent = node.getparent()
355 if parent is not None:
356 parent_nsmap = parent.nsmap
357 for key in parent_nsmap:
358 if this_nsmap[key] == parent_nsmap[key]:
362 def has_ns(self, node):
363 return bool(self._get_local_ns(node))
365 def ns2text(self, node):
367 for name, value in self._get_local_ns(node).items():
370 ns_accumulator.append("%s=%s" % (name, value))
371 return '\n'.join(ns_accumulator)
375 if force_implementation is None:
377 return LxmlEtreeXmlVfs()
378 elif use_elementtree:
379 return ElementTreeXmlVfs()
381 return MiniDOMXmlVfs()
382 elif force_implementation == 'minidom':
383 return MiniDOMXmlVfs()
384 elif force_implementation == 'elementtree':
385 return ElementTreeXmlVfs()
386 elif force_implementation == 'lxml':
387 return LxmlEtreeXmlVfs()
389 raise ValueError('Unknown implementation "%s", expected "minidom", '
390 '"elementtree" or "lxml"' % force_implementation)
394 """List the entire VFS"""
396 xmlvfs = build_xmlvfs()
401 """Extract a file from the VFS"""
403 xmlvfs = build_xmlvfs()
404 xml_filename = sys.argv[3]
405 real_filename = sys.argv[4]
407 node = xmlvfs.getroot()
408 for path_comp in xml_filename.split('/'):
410 i = int(path_comp.split(' ', 1)[0])
411 node = xmlvfs.get_tag_node(node, i)
412 elif path_comp in ('attributes', 'namespaces', 'text'):
415 xml_error('Unknown file')
417 if path_comp == 'attributes':
418 if xmlvfs.getattrs(node):
419 text = xmlvfs.attrs2text(node)
421 xml_error('There are no attributes')
423 elif path_comp == 'namespaces':
424 if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
425 text = xmlvfs.ns2text(node)
427 xml_error('There are no namespaces')
429 elif path_comp == 'text':
430 if '/' in xml_filename:
431 text = xmlvfs.collect_text(node)
433 text = xmlvfs.get_root_comments()
436 xml_error('Unknown file')
438 outfile = open(real_filename, 'w')
444 """Put a file to the VFS"""
445 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
449 """Remove a file from the VFS"""
450 sys.exit("XML VFS doesn't support removing files/directories "
451 "(read-only filesystem)")
453 mcxml_rmdir = mcxml_rm
457 """Create a directory in the VFS"""
458 sys.exit("XML VFS doesn't support creating directories "
459 "(read-only filesystem)")
462 def xml_error(error_str):
463 logger.critical("Error walking XML file: %s", error_str)
466 command = sys.argv[1]
467 procname = "mcxml_" + command
470 if procname not in g:
471 logger.critical("Unknown command %s", command)
479 logger.exception("Error during run")