1 #! /usr/bin/env python3
2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; in older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 See detailed installation instructions at
17 https://phdru.name/Software/mc/xml_INSTALL.html.
19 The VFS represents tags as directories; the directories are numbered to
20 distinguish tags with the same name; numbering also helps to sort tags by their
21 order in XML instead of sorting them by name and prevents name clash when tag
22 names coincide with the names of special files used by XML VFS. Attributes,
23 text nodes and comments are represented as text files; attributes are shown in
24 a file named "attributes", attributes are listed in the file as name=value
25 lines (I deliberately ignore a small chance of newline characters in values);
26 names and values are reencoded to the console encoding. Text nodes and comments
27 are collected in a file named "text", stripped and reencoded. The filesystem is
30 Date/time for all directories/files set to the last modification time of the
33 Implementation based on minidom doesn't understand namespaces, it just shows
34 them among other attributes. ElementTree-based implementation doesn't show
35 namespaces at all. Implementation based on lxml.etree shows namespaces in a
36 separate file "namespaces".
38 It is useful to have a top-down view on an XML structure but it's especially
39 convenient to extract text values from tags. One can get, for example, a
40 base64-encoded image - just walk down the VFS to the tag's directory and copy
41 its text file to a real file.
43 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
48 __author__ = "Oleg Broytman <phd@phdru.name>"
49 __copyright__ = "Copyright (C) 2013-2023 PhiloSoft Design"
52 # Can be None for default choice, 'lxml', 'elementtree' or 'minidom'.
53 force_implementation = None
56 use_elementtree = False
60 from os.path import getmtime
62 from time import localtime
63 import xml.dom.minidom
65 if sys.version_info[0] == 2:
67 import xml.etree.ElementTree as ET
71 use_elementtree = True
74 import lxml.etree as etree
87 # Get the default charset.
89 if sys.version_info[:2] < (3, 11):
90 lcAll = locale.getdefaultlocale()
93 except locale.Error as err:
94 print("WARNING:", err, file=sys.stderr)
98 default_encoding = lcAll[1]
101 default_encoding = locale.getpreferredencoding()
102 except locale.Error as err:
103 print("WARNING:", err, file=sys.stderr)
104 default_encoding = sys.getdefaultencoding()
106 default_encoding = sys.getdefaultencoding()
109 logger = logging.getLogger('xml-mcextfs')
110 log_err_handler = logging.StreamHandler(sys.stderr)
111 logger.addHandler(log_err_handler)
112 logger.setLevel(logging.INFO)
114 if len(sys.argv) < 3:
116 XML Virtual FileSystem for Midnight Commander version %s
120 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
121 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
122 __version__, __author__, __copyright__)
126 locale.setlocale(locale.LC_ALL, '')
129 class XmlVfs(object):
130 """Abstract base class"""
132 supports_namespaces = False
135 self.xml_file = sys.argv[2]
139 Y, m, d, H, M = localtime(getmtime(self.xml_file))[0:5]
140 self.xml_file_dt = "%02d-%02d-%d %02d:%02d" % (m, d, Y, H, M)
142 root_comments = self.get_root_comments()
144 print("-r--r--r-- 1 user group %d %s text" % (
145 len(root_comments), self.xml_file_dt))
147 self._list(self.getroot())
149 def _list(self, node, path=''):
150 n = len(self.getchildren(node))
152 width = int(math.log10(n)) + 1
153 template = "%%0%dd" % width
157 for element in self.getchildren(node):
158 if not self.istag(element):
161 tag = self.getlocalname(self.gettag(element))
163 subpath = '%s/%s %s' % (path, template % n, tag)
165 subpath = '%s %s' % (template % n, tag)
166 subpath_encoded = subpath.encode(default_encoding, "replace")
167 print("dr-xr-xr-x 1 user group 0 %s %s" % (
168 self.xml_file_dt, subpath_encoded))
169 if self.getattrs(element):
170 attr_text = self.attrs2text(element)
171 print("-r--r--r-- 1 user group %d %s %s/attributes" % (
172 len(attr_text), self.xml_file_dt, subpath_encoded))
173 if self.supports_namespaces and self.has_ns(element):
174 ns_text = self.ns2text(element)
175 print("-r--r--r-- 1 user group %d %s %s/namespaces" % (
176 len(ns_text), self.xml_file_dt, subpath_encoded))
177 text = self.collect_text(element)
179 print("-r--r--r-- 1 user group %d %s %s/text" % (
180 len(text), self.xml_file_dt, subpath_encoded))
181 self._list(element, subpath)
183 def get_tag_node(self, node, i):
185 for element in self.getchildren(node):
186 if self.istag(element):
190 xml_error('There are less than %d nodes' % i)
192 def attrs2text(self, node):
193 attr_accumulator = []
194 for name, value in self.getattrs(node):
195 name = self.getlocalname(name).encode(default_encoding, "replace")
196 value = value.encode(default_encoding, "replace")
197 attr_accumulator.append("%s=%s" % (name, value))
198 return '\n'.join(attr_accumulator)
200 def has_ns(self, node):
204 class MiniDOMXmlVfs(XmlVfs):
206 self.document = xml.dom.minidom.parse(self.xml_file)
208 def getattrs(self, node):
209 attrs = node.attributes
210 attrs = [attrs.item(i) for i in range(attrs.length)]
211 return [(a.name, a.value) for a in attrs]
213 def collect_text(self, node):
214 text_accumulator = []
215 for element in node.childNodes:
216 if element.localName:
218 elif element.nodeType == element.COMMENT_NODE:
219 text = u"<!--%s-->" % element.nodeValue
220 elif element.nodeType == element.TEXT_NODE:
221 text = element.nodeValue.strip()
223 xml_error("Unknown node type %d" % element.nodeType)
225 text_accumulator.append(text)
226 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
231 def get_root_comments(self):
232 return self.collect_text(self.document)
234 def getchildren(self, node):
235 return node.childNodes
237 def gettag(self, node):
238 return node.localName
240 def istag(self, node):
241 return bool(node.localName)
243 def getlocalname(self, name):
247 if use_elementtree or use_lxml:
248 class CommonEtreeXmlVfs(XmlVfs):
249 def getattrs(self, node):
250 return node.attrib.items()
252 def collect_text(self, node):
253 text_accumulator = []
255 text = node.text.strip()
257 text_accumulator.append(text)
259 if not self.istag(element):
260 text = u"<!--%s-->" % element.text
261 text_accumulator.append(text)
263 text = node.tail.strip()
265 text_accumulator.append(text)
266 return '\n'.join(text_accumulator).encode(
267 default_encoding, "replace")
269 def getchildren(self, node):
272 def gettag(self, node):
275 def istag(self, node):
276 return isinstance(node.tag, basestring)
280 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
282 # Copied from http://effbot.org/zone/element-pi.htm
284 class PIParser(ET.XMLTreeBuilder):
287 ET.XMLTreeBuilder.__init__(self)
288 # assumes ElementTree 1.2.X
289 self._parser.CommentHandler = self.handle_comment
290 self._parser.ProcessingInstructionHandler = self.handle_pi
291 self._target.start("document", {})
294 self._target.end("document")
295 return ET.XMLTreeBuilder.close(self)
297 def handle_comment(self, data):
298 self._target.start(ET.Comment, {})
299 self._target.data(data)
300 self._target.end(ET.Comment)
302 def handle_pi(self, target, data):
303 self._target.start(ET.PI, {})
304 self._target.data(target + " " + data)
305 self._target.end(ET.PI)
307 self.document = ET.parse(self.xml_file, PIParser())
310 return self.document.getroot()
312 def get_root_comments(self):
313 text_accumulator = []
314 for element in self.getroot():
315 if not self.istag(element):
316 text = u"<!--%s-->" % element.text
317 text_accumulator.append(text)
318 return '\n'.join(text_accumulator).encode(
319 default_encoding, "replace")
321 def getlocalname(self, name):
322 if name.startswith('{'):
323 name = name.split('}', 1)[1] # Remove XML namespace
328 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
329 supports_namespaces = True
332 self.document = etree.parse(self.xml_file)
335 return [self.document.getroot()]
337 def get_root_comments(self):
338 text_accumulator = []
339 for element in self.document.getroot().itersiblings(
340 tag=etree.Comment, preceding=True):
341 text = u"<!--%s-->" % element.text
342 text_accumulator.append(text)
343 return '\n'.join(text_accumulator).encode(
344 default_encoding, "replace")
346 def getlocalname(self, name):
347 return etree.QName(name).localname
349 def _get_local_ns(self, node):
350 this_nsmap = node.nsmap
351 parent = node.getparent()
352 if parent is not None:
353 parent_nsmap = parent.nsmap
354 for key in parent_nsmap:
355 if this_nsmap[key] == parent_nsmap[key]:
359 def has_ns(self, node):
360 return bool(self._get_local_ns(node))
362 def ns2text(self, node):
364 for name, value in self._get_local_ns(node).items():
366 name = name.encode(default_encoding, "replace")
369 value = value.encode(default_encoding, "replace")
370 ns_accumulator.append("%s=%s" % (name, value))
371 return '\n'.join(ns_accumulator)
375 if force_implementation is None:
377 return LxmlEtreeXmlVfs()
378 elif use_elementtree:
379 return ElementTreeXmlVfs()
381 return MiniDOMXmlVfs()
382 elif force_implementation == 'minidom':
383 return MiniDOMXmlVfs()
384 elif force_implementation == 'elementtree':
385 return ElementTreeXmlVfs()
386 elif force_implementation == 'lxml':
387 return LxmlEtreeXmlVfs()
389 raise ValueError('Unknown implementation "%s", expected "minidom", '
390 '"elementtree" or "lxml"' % force_implementation)
394 """List the entire VFS"""
396 xmlvfs = build_xmlvfs()
401 """Extract a file from the VFS"""
403 xmlvfs = build_xmlvfs()
404 xml_filename = sys.argv[3]
405 real_filename = sys.argv[4]
407 node = xmlvfs.getroot()
408 for path_comp in xml_filename.split('/'):
410 i = int(path_comp.split(' ', 1)[0])
411 node = xmlvfs.get_tag_node(node, i)
412 elif path_comp in ('attributes', 'namespaces', 'text'):
415 xml_error('Unknown file')
417 if path_comp == 'attributes':
418 if xmlvfs.getattrs(node):
419 text = xmlvfs.attrs2text(node)
421 xml_error('There are no attributes')
423 elif path_comp == 'namespaces':
424 if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
425 text = xmlvfs.ns2text(node)
427 xml_error('There are no namespaces')
429 elif path_comp == 'text':
430 if '/' in xml_filename:
431 text = xmlvfs.collect_text(node)
433 text = xmlvfs.get_root_comments()
436 xml_error('Unknown file')
438 outfile = open(real_filename, 'w')
444 """Put a file to the VFS"""
445 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
449 """Remove a file from the VFS"""
450 sys.exit("XML VFS doesn't support removing files/directories "
451 "(read-only filesystem)")
453 mcxml_rmdir = mcxml_rm
457 """Create a directory in the VFS"""
458 sys.exit("XML VFS doesn't support creating directories "
459 "(read-only filesystem)")
462 def xml_error(error_str):
463 logger.critical("Error walking XML file: %s", error_str)
466 command = sys.argv[1]
467 procname = "mcxml_" + command
470 if procname not in g:
471 logger.critical("Unknown command %s", command)
479 logger.exception("Error during run")