2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ just put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; in older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 See detailed installation instructions at
17 http://phdru.name/Software/mc/xml_INSTALL.html.
19 The VFS represents tags as directories; the directories are numbered to
20 distinguish tags with the same name; numbering also helps to sort tags by their
21 order in XML instead of sorting them by name and prevents name clash when tag
22 names coincide with the names of special files used by XML VFS. Attributes,
23 text nodes and comments are represented as text files; attributes are shown in
24 a file named "attributes", attributes are listed in the file as name=value
25 lines (I deliberately ignore a small chance of newline characters in values);
26 names and values are reencoded to the console encoding. Text nodes and comments
27 are collected in a file named "text", stripped and reencoded. The filesystem is
30 Implementation based on minidom doesn't understand namespaces, it just shows
31 them among other attributes. ElementTree-based implementation doesn't show
32 namespaces at all. Implementation based on lxml.etree shows namespaces in a
33 separate file "namespaces".
35 It is useful to have a top-down view on an XML structure but it's especially
36 convenient to extract text values from tags. One can get, for example, a
37 base64-encoded image - just walk down the VFS to the tag's directory and copy
38 its text file to a real file.
40 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
45 __author__ = "Oleg Broytman <phd@phdru.name>"
46 __copyright__ = "Copyright (C) 2013-2015 PhiloSoft Design"
49 force_implementation = None # Can be None for default choice,
50 # 'lxml', 'elementtree' or 'minidom'
53 use_elementtree = False
58 import xml.dom.minidom
61 import xml.etree.ElementTree as ET
65 use_elementtree = True
68 import lxml.etree as etree
81 # Get the default charset.
83 lcAll = locale.getdefaultlocale()
84 except locale.Error, err:
85 print >>sys.stderr, "WARNING:", err
89 default_encoding = lcAll[1]
92 default_encoding = locale.getpreferredencoding()
93 except locale.Error, err:
94 print >>sys.stderr, "WARNING:", err
95 default_encoding = sys.getdefaultencoding()
97 default_encoding = sys.getdefaultencoding()
100 logger = logging.getLogger('xml-mcextfs')
101 log_err_handler = logging.StreamHandler(sys.stderr)
102 logger.addHandler(log_err_handler)
103 logger.setLevel(logging.INFO)
105 if len(sys.argv) < 3:
107 XML Virtual FileSystem for Midnight Commander version %s
111 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
112 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
113 __version__, __author__, __copyright__
118 locale.setlocale(locale.LC_ALL, '')
121 class XmlVfs(object):
122 """Abstract base class"""
124 supports_namespaces = False
130 root_comments = self.get_root_comments()
132 print "-r--r--r-- 1 user group %d Jan 1 00:00 text" % (len(root_comments))
133 self._list(self.getroot())
135 def _list(self, node, path=''):
136 n = len(self.getchildren(node))
138 width = int(math.log10(n)) + 1
139 template = "%%0%dd" % width
143 for element in self.getchildren(node):
144 if not self.istag(element):
147 tag = self.getlocalname(self.gettag(element))
149 subpath = '%s/%s %s' % (path, template % n, tag)
151 subpath = '%s %s' % (template % n, tag)
152 subpath_encoded = subpath.encode(default_encoding, "replace")
153 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
154 if self.getattrs(element):
155 attr_text = self.attrs2text(element)
156 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
157 len(attr_text), subpath_encoded)
158 if self.supports_namespaces and self.has_ns(element):
159 ns_text = self.ns2text(element)
160 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/namespaces" % (
161 len(ns_text), subpath_encoded)
162 text = self.collect_text(element)
164 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
165 len(text), subpath_encoded)
166 self._list(element, subpath)
168 def get_tag_node(self, node, i):
170 for element in self.getchildren(node):
171 if self.istag(element):
175 xml_error('There are less than %d nodes' % i)
177 def attrs2text(self, node):
178 attr_accumulator = []
179 for name, value in self.getattrs(node):
180 name = self.getlocalname(name).encode(default_encoding, "replace")
181 value = value.encode(default_encoding, "replace")
182 attr_accumulator.append("%s=%s" % (name, value))
183 return '\n'.join(attr_accumulator)
185 def has_ns(self, node):
189 class MiniDOMXmlVfs(XmlVfs):
191 self.document = xml.dom.minidom.parse(sys.argv[2])
193 def getattrs(self, node):
194 attrs = node.attributes
195 attrs = [attrs.item(i) for i in range(attrs.length)]
196 return [(a.name, a.value) for a in attrs]
198 def collect_text(self, node):
199 text_accumulator = []
200 for element in node.childNodes:
201 if element.localName:
203 elif element.nodeType == element.COMMENT_NODE:
204 text = u"<!--%s-->" % element.nodeValue
205 elif element.nodeType == element.TEXT_NODE:
206 text = element.nodeValue.strip()
208 xml_error("Unknown node type %d" % element.nodeType)
209 if text: text_accumulator.append(text)
210 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
215 def get_root_comments(self):
216 return self.collect_text(self.document)
218 def getchildren(self, node):
219 return node.childNodes
221 def gettag(self, node):
222 return node.localName
224 def istag(self, node):
225 return bool(node.localName)
227 def getlocalname(self, name):
231 if use_elementtree or use_lxml:
232 class CommonEtreeXmlVfs(XmlVfs):
233 def getattrs(self, node):
234 return node.attrib.items()
236 def collect_text(self, node):
237 text_accumulator = []
239 text = node.text.strip()
240 if text: text_accumulator.append(text)
242 if not self.istag(element):
243 text = u"<!--%s-->" % element.text
244 text_accumulator.append(text)
246 text = node.tail.strip()
247 if text: text_accumulator.append(text)
248 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
250 def getchildren(self, node):
253 def gettag(self, node):
256 def istag(self, node):
257 return isinstance(node.tag, basestring)
261 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
263 # Copied from http://effbot.org/zone/element-pi.htm
265 class PIParser(ET.XMLTreeBuilder):
268 ET.XMLTreeBuilder.__init__(self)
269 # assumes ElementTree 1.2.X
270 self._parser.CommentHandler = self.handle_comment
271 self._parser.ProcessingInstructionHandler = self.handle_pi
272 self._target.start("document", {})
275 self._target.end("document")
276 return ET.XMLTreeBuilder.close(self)
278 def handle_comment(self, data):
279 self._target.start(ET.Comment, {})
280 self._target.data(data)
281 self._target.end(ET.Comment)
283 def handle_pi(self, target, data):
284 self._target.start(ET.PI, {})
285 self._target.data(target + " " + data)
286 self._target.end(ET.PI)
288 self.document = ET.parse(sys.argv[2], PIParser())
291 return self.document.getroot()
293 def get_root_comments(self):
294 text_accumulator = []
295 for element in self.getroot():
296 if not self.istag(element):
297 text = u"<!--%s-->" % element.text
298 text_accumulator.append(text)
299 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
301 def getlocalname(self, name):
302 if name.startswith('{'):
303 name = name.split('}', 1)[1] # Remove XML namespace
308 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
309 supports_namespaces = True
312 self.document = etree.parse(sys.argv[2])
315 return [self.document.getroot()]
317 def get_root_comments(self):
318 text_accumulator = []
319 for element in self.document.getroot().itersiblings(tag=etree.Comment, preceding=True):
320 text = u"<!--%s-->" % element.text
321 text_accumulator.append(text)
322 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
324 def getlocalname(self, name):
325 return etree.QName(name).localname
327 def _get_local_ns(self, node):
328 this_nsmap = node.nsmap
329 parent = node.getparent()
330 if parent is not None:
331 parent_nsmap = parent.nsmap
332 for key in parent_nsmap:
333 if this_nsmap[key] == parent_nsmap[key]:
337 def has_ns(self, node):
338 return bool(self._get_local_ns(node))
340 def ns2text(self, node):
342 for name, value in self._get_local_ns(node).items():
344 name = name.encode(default_encoding, "replace")
347 value = value.encode(default_encoding, "replace")
348 ns_accumulator.append("%s=%s" % (name, value))
349 return '\n'.join(ns_accumulator)
353 if force_implementation is None:
355 return LxmlEtreeXmlVfs()
356 elif use_elementtree:
357 return ElementTreeXmlVfs()
359 return MiniDOMXmlVfs()
360 elif force_implementation == 'minidom':
361 return MiniDOMXmlVfs()
362 elif force_implementation == 'elementtree':
363 return ElementTreeXmlVfs()
364 elif force_implementation == 'lxml':
365 return LxmlEtreeXmlVfs()
367 raise ValueError('Unknown implementation "%s", expected "minidom", "elementtree" or "lxml"' % force_implementation)
371 """List the entire VFS"""
373 xmlvfs = build_xmlvfs()
378 """Extract a file from the VFS"""
380 xmlvfs = build_xmlvfs()
381 xml_filename = sys.argv[3]
382 real_filename = sys.argv[4]
384 node = xmlvfs.getroot()
385 for path_comp in xml_filename.split('/'):
387 i = int(path_comp.split(' ', 1)[0])
388 node = xmlvfs.get_tag_node(node, i)
389 elif path_comp in ('attributes', 'namespaces', 'text'):
392 xml_error('Unknown file')
394 if path_comp == 'attributes':
395 if xmlvfs.getattrs(node):
396 text = xmlvfs.attrs2text(node)
398 xml_error('There are no attributes')
400 elif path_comp == 'namespaces':
401 if xmlvfs.supports_namespaces and xmlvfs.has_ns(node):
402 text = xmlvfs.ns2text(node)
404 xml_error('There are no namespaces')
406 elif path_comp == 'text':
407 if '/' in xml_filename:
408 text = xmlvfs.collect_text(node)
410 text = xmlvfs.get_root_comments()
413 xml_error('Unknown file')
415 outfile = open(real_filename, 'w')
421 """Put a file to the VFS"""
422 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
425 """Remove a file from the VFS"""
426 sys.exit("XML VFS doesn't support removing files/directories (read-only filesystem)")
428 mcxml_rmdir = mcxml_rm
431 """Create a directory in the VFS"""
432 sys.exit("XML VFS doesn't support creating directories (read-only filesystem)")
435 def xml_error(error_str):
436 logger.critical("Error walking XML file: %s", error_str)
439 command = sys.argv[1]
440 procname = "mcxml_" + command
443 if not g.has_key(procname):
444 logger.critical("Unknown command %s", command)
452 logger.exception("Error during run")