2 """XML Virtual FileSystem for Midnight Commander
4 The script requires Midnight Commander 3.1+
5 (http://www.midnight-commander.org/), Python 2.4+ (http://www.python.org/).
7 For mc 4.7+ put the script in $HOME/[.local/share/].mc/extfs.d.
8 For older versions put it in /usr/[local/][lib|share]/mc/extfs
9 and add a line "xml" to the /usr/[local/][lib|share]/mc/extfs/extfs.ini.
10 Make the script executable.
12 For mc 4.7+ run this "cd" command in the Midnight Commander (in the "bindings"
13 file the command is "%cd"): cd file/xml://; In older versions it is
14 cd file#xml, where "file" is the name of your XML file.
16 The VFS represents tags as directories; the directories are numbered to
17 distinguish tags with the same name; also numbering helps to sort tags by their
18 order in XML instead of sorting them by name. Attributes, text nodes and
19 comments are represented as text files; attributes are shown in a file named
20 "attributes", attributes are listed in the file as name=value lines (I
21 deliberately ignore a small chance of newline characters in values); names and
22 values are reencoded to the console encoding. Text nodes and comments are
23 collected in a file named "text", stripped and reencoded. The filesystem is
24 read-only. ElementTree- and lxml.etree-based implementations don't show
25 namespaces as attributes.
27 It is useful to have a top-down view on an XML structure but it's especially
28 convenient to extract text values from tags. One can get, for example, a
29 base64-encoded image - just walk down the VFS to the tag's directory and copy
30 its text file to a real file.
32 The VFS was inspired by a FUSE xmlfs: https://github.com/halhen/xmlfs
37 __author__ = "Oleg Broytman <phd@phdru.name>"
38 __copyright__ = "Copyright (C) 2013 PhiloSoft Design"
41 default_implementation = None # Can be None for default choice,
42 # 'lxml', 'elementtree' or 'minidom'
45 use_elementtree = False
50 import xml.dom.minidom
53 import xml.etree.ElementTree as ET
57 use_elementtree = True
60 import lxml.etree as etree
73 # Get the default charset.
75 lcAll = locale.getdefaultlocale()
76 except locale.Error, err:
77 print >>sys.stderr, "WARNING:", err
81 default_encoding = lcAll[1]
84 default_encoding = locale.getpreferredencoding()
85 except locale.Error, err:
86 print >>sys.stderr, "WARNING:", err
87 default_encoding = sys.getdefaultencoding()
89 default_encoding = sys.getdefaultencoding()
92 logger = logging.getLogger('xml-mcextfs')
93 log_err_handler = logging.StreamHandler(sys.stderr)
94 logger.addHandler(log_err_handler)
95 logger.setLevel(logging.INFO)
99 XML Virtual FileSystem for Midnight Commander version %s
103 This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
104 /usr/[local/][lib|share]/mc/extfs. For more information read the source!""",
105 __version__, __author__, __copyright__
110 locale.setlocale(locale.LC_ALL, '')
113 class XmlVfs(object):
118 self._list(self.getroot())
120 def get_child_node(self, node, i):
122 for element in self.getchildren(node):
123 if self.istag(element):
127 xml_error('There are less than %d nodes' % i)
130 class MiniDOMXmlVfs(XmlVfs):
132 self.document = xml.dom.minidom.parse(sys.argv[2])
134 def hasattrs(self, node):
135 return bool(node.attributes)
137 def attrs2text(self, node):
138 attrs = node.attributes
139 attrs = [attrs.item(i) for i in range (attrs.length)]
140 return '\n'.join(["%s=%s" %
141 (a.name.encode(default_encoding, "replace"),
142 a.value.encode(default_encoding, "replace"))
145 def collect_text(self, node):
146 text_accumulator = []
147 for element in node.childNodes:
148 if element.localName:
150 elif element.nodeType == element.COMMENT_NODE:
151 text = u"<!--%s-->" % element.nodeValue
152 elif element.nodeType == element.TEXT_NODE:
153 text = element.nodeValue.strip()
155 xml_error("Unknown node type %d" % element.nodeType)
156 if text: text_accumulator.append(text)
157 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
159 def _list(self, node, path=''):
160 childNodes = node.childNodes
162 for element in childNodes:
163 if element.localName:
166 width = int(math.log10(n))+1
167 template = "%%0%dd" % width
171 for element in childNodes:
172 if element.localName:
175 subpath = '%s/%s %s' % (path, template % n, element.localName)
177 subpath = '%s %s' % (template % n, element.localName)
178 subpath_encoded = subpath.encode(default_encoding, "replace")
179 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
180 if self.hasattrs(element):
181 attr_text = self.attrs2text(element)
182 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
183 len(attr_text), subpath_encoded)
184 text = self.collect_text(element)
186 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
187 len(text), subpath_encoded)
188 self._list(element, subpath)
193 def getchildren(self, node):
194 return node.childNodes
196 def istag(self, node):
197 return bool(node.localName)
200 if use_elementtree or use_lxml:
201 class CommonEtreeXmlVfs(XmlVfs):
202 def hasattrs(self, node):
203 return bool(node.attrib)
205 def collect_text(self, node):
206 text_accumulator = []
208 text = node.text.strip()
209 if text: text_accumulator.append(text)
211 if not self.istag(element):
212 text = u"<!--%s-->" % text
213 text_accumulator.append(text)
215 text = node.tail.strip()
216 if text: text_accumulator.append(text)
217 return '\n'.join(text_accumulator).encode(default_encoding, "replace")
220 return self.document.getroot()
222 def getchildren(self, node):
225 def istag(self, node):
226 return isinstance(node.tag, basestring)
230 class ElementTreeXmlVfs(CommonEtreeXmlVfs):
232 # Copied from http://effbot.org/zone/element-pi.ht
234 class PIParser(ET.XMLTreeBuilder):
237 ET.XMLTreeBuilder.__init__(self)
238 # assumes ElementTree 1.2.X
239 self._parser.CommentHandler = self.handle_comment
240 self._parser.ProcessingInstructionHandler = self.handle_pi
241 self._target.start("document", {})
244 self._target.end("document")
245 return ET.XMLTreeBuilder.close(self)
247 def handle_comment(self, data):
248 self._target.start(ET.Comment, {})
249 self._target.data(data)
250 self._target.end(ET.Comment)
252 def handle_pi(self, target, data):
253 self._target.start(ET.PI, {})
254 self._target.data(target + " " + data)
255 self._target.end(ET.PI)
257 self.document = ET.parse(sys.argv[2], PIParser())
259 def attrs2text(self, node):
260 attr_accumulator = []
261 for name, value in node.attrib.items():
262 name = name.encode(default_encoding, "replace")
263 value = value.encode(default_encoding, "replace")
264 if name.startswith('{'):
265 name = name.split('}', 1)[1] # Remove XML namespace
266 attr_accumulator.append("%s=%s" % (name, value))
267 return '\n'.join(attr_accumulator)
269 def _list(self, node, path=''):
272 width = int(math.log10(n))+1
273 template = "%%0%dd" % width
278 if not isinstance(element.tag, basestring):
282 if tag.startswith('{'):
283 tag = tag.split('}', 1)[1] # Remove XML namespace
285 subpath = '%s/%s %s' % (path, template % n, tag)
287 subpath = '%s %s' % (template % n, tag)
288 subpath_encoded = subpath.encode(default_encoding, "replace")
289 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
290 if self.hasattrs(element):
291 attr_text = self.attrs2text(element)
292 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
293 len(attr_text), subpath_encoded)
294 text = self.collect_text(element)
296 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
297 len(text), subpath_encoded)
298 self._list(element, subpath)
302 class LxmlEtreeXmlVfs(CommonEtreeXmlVfs):
304 self.document = etree.parse(sys.argv[2])
306 def attrs2text(self, node):
307 attr_accumulator = []
308 for name, value in node.attrib.items():
309 name = etree.QName(name).localname.encode(default_encoding, "replace")
310 value = value.encode(default_encoding, "replace")
311 attr_accumulator.append("%s=%s" % (name, value))
312 return '\n'.join(attr_accumulator)
315 self._list(self.getroot())
317 def _list(self, node, path=''):
320 width = int(math.log10(n))+1
321 template = "%%0%dd" % width
326 if not isinstance(element.tag, basestring):
329 tag = etree.QName(element.tag).localname
331 subpath = '%s/%s %s' % (path, template % n, tag)
333 subpath = '%s %s' % (template % n, tag)
334 subpath_encoded = subpath.encode(default_encoding, "replace")
335 print "dr-xr-xr-x 1 user group 0 Jan 1 00:00 %s" % subpath_encoded
336 if self.hasattrs(element):
337 attr_text = self.attrs2text(element)
338 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/attributes" % (
339 len(attr_text), subpath_encoded)
340 text = self.collect_text(element)
342 print "-r--r--r-- 1 user group %d Jan 1 00:00 %s/text" % (
343 len(text), subpath_encoded)
344 self._list(element, subpath)
347 return [self.document.getroot()]
351 if default_implementation is None:
353 return LxmlEtreeXmlVfs()
354 elif use_elementtree:
355 return ElementTreeXmlVfs()
357 return MiniDOMXmlVfs()
358 elif default_implementation == 'minidom':
359 return MiniDOMXmlVfs()
360 elif default_implementation == 'elementtree':
361 return ElementTreeXmlVfs()
362 elif default_implementation == 'lxml':
363 return LxmlEtreeXmlVfs()
367 """List the entire VFS"""
369 xmlvfs = build_xmlvfs()
374 """Extract a file from the VFS"""
376 xmlvfs = build_xmlvfs()
377 xml_filename = sys.argv[3]
378 real_filename = sys.argv[4]
380 node = xmlvfs.getroot()
381 for path_comp in xml_filename.split('/'):
383 i = int(path_comp.split(' ', 1)[0])
384 node = xmlvfs.get_child_node(node, i)
385 elif path_comp in ('attributes', 'text'):
388 xml_error('Unknown file')
390 if path_comp == 'attributes':
391 if xmlvfs.hasattrs(node):
392 text = xmlvfs.attrs2text(node)
394 xml_error('There are no attributes')
396 if path_comp == 'text':
397 text = xmlvfs.collect_text(node)
399 outfile = open(real_filename, 'w')
405 """Put a file to the VFS"""
406 sys.exit("XML VFS doesn't support adding files (read-only filesystem)")
409 """Remove a file from the VFS"""
410 sys.exit("XML VFS doesn't support removing files/directories (read-only filesystem)")
412 mcxml_rmdir = mcxml_rm
415 """Create a directory in the VFS"""
416 sys.exit("XML VFS doesn't support creating directories (read-only filesystem)")
419 def xml_error(error_str):
420 logger.critical("Error walking XML file: %s", error_str)
423 command = sys.argv[1]
424 procname = "mcxml_" + command
427 if not g.has_key(procname):
428 logger.critical("Unknown command %s", command)
436 logger.exception("Error during run")