1 #! /usr/local/bin/python -O
2 """Decode MIME message"""
6 __version__ = "$Revision$"[11:-2]
7 __date__ = "$Date$"[7:-2]
8 __revision__ = "$Id$"[5:-2]
9 __author__ = "Oleg Broytmann <phd@phd.pp.ru>"
10 __copyright__ = "Copyright (C) 2001-2006 PhiloSoft Design"
11 __license__ = "GNU GPL"
24 from cStringIO import StringIO
26 from StringIO import StringIO
30 host_name = socket.gethostname()
32 me = os.path.basename(sys.argv[0])
37 BroytMann mimedecode.py version %s, %s
38 """ % (_version, __copyright__))
45 Usage: %s [-h|--help] [-V|--version] [-cCDP] [-f charset] [-d header] [-p header:param] [-beit mask] [filename]
50 def output(s, outfile = sys.stdout):
53 def output_headers(msg, outfile = sys.stdout):
54 unix_from = msg.get_unixfrom()
56 output(unix_from + os.linesep)
57 for key, value in msg.items():
58 output("%s: %s\n" % (key, value), outfile)
59 output("\n", outfile) # End of headers
62 def recode(s, charset):
63 return unicode(s, charset, "replace").encode(GlobalOptions.default_charset, "replace")
66 def recode2(s, charset):
67 if charset and charset <> GlobalOptions.default_charset:
68 s = recode(s, charset)
72 def _decode_header(s):
73 """Return a decoded string according to RFC 2047.
74 NOTE: This is almost the same as email.Utils.decode.
76 from types import ListType
79 L = email.Header.decode_header(s)
80 if not isinstance(L, ListType):
85 for atom, charset in L:
89 rtn.append(recode2(atom, charset))
91 del rtn[-1] # remove the last space
93 # Now that we've decoded everything, we just need to join all the parts
94 # together into the final string.
98 def decode_header(msg, header):
99 "Decode mail header (if exists) and put it back, if it was encoded"
101 if msg.has_key(header):
103 new_value = _decode_header(value)
104 if new_value <> value: # do not bother to touch msg if not changed
105 set_header(msg, header, new_value)
108 def _decode_header_param(s):
109 return recode2(s[2], s[0])
112 def decode_header_param(msg, header, param):
113 "Decode mail header's parameter (if exists) and put it back, if it was encoded"
115 if msg.has_key(header):
116 value = msg.get_param(param, header=header)
118 from types import TupleType
119 if isinstance(value, TupleType):
120 new_value = _decode_header_param(value)
122 new_value = _decode_header(value)
123 if new_value <> value: # do not bother to touch msg if not changed
124 msg.set_param(param, new_value, header)
127 def decode_headers(msg):
128 "Decode message headers according to global options"
130 for header in GlobalOptions.decode_headers:
131 decode_header(msg, header)
133 for header, param in GlobalOptions.decode_header_params:
134 decode_header_param(msg, header, param)
137 def set_header(msg, header, value):
140 if msg.has_key(header):
141 msg.replace_header(header, value)
146 def set_content_type(msg, newtype, charset=None):
147 msg.set_type(newtype)
150 msg.set_param("charset", charset, "Content-Type")
154 caps = None # Globally stored mailcap database; initialized only if needed
156 def decode_body(msg, s):
157 "Decode body to plain text using first copiousoutput filter from mailcap"
159 import mailcap, tempfile
163 caps = mailcap.getcaps()
165 content_type = msg.get_content_type()
166 filename = tempfile.mktemp()
169 entries = mailcap.lookup(caps, content_type, "view")
170 for entry in entries:
171 if entry.has_key('copiousoutput'):
172 if entry.has_key('test'):
173 test = mailcap.subst(entry['test'], content_type, filename)
174 if test and os.system(test) != 0:
176 command = mailcap.subst(entry["view"], content_type, filename)
182 file = open(filename, 'w')
186 pipe = os.popen(command, 'r')
191 set_content_type(msg, "text/plain")
192 msg["X-MIME-Autoconverted"] = "from %s to text/plain by %s id %s" % (content_type, host_name, command.split()[0])
197 def recode_charset(msg, s):
198 "Recode charset of the message to the default charset"
200 save_charset = charset = msg.get_content_charset()
201 if charset and charset <> GlobalOptions.default_charset:
202 s = recode2(s, charset)
203 content_type = msg.get_content_type()
204 set_content_type(msg, content_type, GlobalOptions.default_charset)
205 msg["X-MIME-Autoconverted"] = "from %s to %s by %s id %s" % (save_charset, GlobalOptions.default_charset, host_name, me)
209 def totext(msg, instring):
210 "Convert instring content to text"
212 if msg.is_multipart(): # Recursively decode all parts of the multipart message
213 newfile = StringIO(str(msg))
218 # Decode body and recode charset
219 s = decode_body(msg, instring)
220 if GlobalOptions.recode_charset:
221 s = recode_charset(msg, s)
227 def decode_part(msg):
228 "Decode one part of the message"
231 encoding = msg["Content-Transfer-Encoding"]
233 if encoding in (None, '', '7bit', '8bit', 'binary'):
234 outstring = str(msg.get_payload())
235 else: # Decode from transfer ecoding to text or binary form
236 outstring = str(msg.get_payload(decode=1))
237 set_header(msg, "Content-Transfer-Encoding", "8bit")
238 msg["X-MIME-Autoconverted"] = "from %s to 8bit by %s id %s" % (encoding, host_name, me)
240 # Test all mask lists and find what to do with this content type
242 ctype = msg.get_content_type()
245 mtype = msg.get_content_maintype()
247 masks.append(mtype + '/*')
250 for content_type in masks:
251 if content_type in GlobalOptions.totext_mask:
252 totext(msg, outstring)
254 elif content_type in GlobalOptions.binary_mask:
258 elif content_type in GlobalOptions.ignore_mask:
260 output("\nMessage body of type `%s' skipped.\n" % content_type)
262 elif content_type in GlobalOptions.error_mask:
263 raise ValueError, "content type `%s' prohibited" % content_type
265 # Neither content type nor masks were listed - decode by default
266 totext(msg, outstring)
269 def decode_file(infile):
270 "Decode the entire message"
272 msg = email.message_from_file(infile)
273 boundary = msg.get_boundary()
275 if msg.is_multipart():
279 if msg.preamble: # Preserve the first part, it is probably not a RFC822-message
280 output(msg.preamble) # Usually it is just a few lines of text (MIME warning)
282 for subpart in msg.get_payload():
283 output("\n--%s\n" % boundary)
286 output("\n--%s--\n" % boundary)
292 if msg.has_key("Content-Type"): # Simple one-part message - decode it
295 else: # Not a message, just text - copy it literally
301 # Get the default charset.
303 lcAll = locale.setlocale(locale.LC_ALL, '').split('.')
304 except locale.Error, err:
305 print >> sys.stderr, "WARNING:", err
309 default_charset = lcAll[1]
312 default_charset = locale.getpreferredencoding()
313 except locale.Error, err:
314 print >> sys.stderr, "WARNING:", err
315 default_charset = sys.getdefaultencoding()
317 default_charset = sys.getdefaultencoding()
319 recode_charset = 1 # recode charset of message body
321 decode_headers = ["Subject", "From"] # A list of headers to decode
322 decode_header_params = [("Content-Type", "name"),
323 ("Content-Disposition", "filename")
324 ] # A list of headers' parameters to decode
326 totext_mask = [] # A list of content-types to decode
327 binary_mask = [] # A list to pass through
328 ignore_mask = [] # Ignore (skip, do not decode and do not include into output)
329 error_mask = [] # Raise error if encounter one of these
333 from getopt import getopt, GetoptError
336 options, arguments = getopt(sys.argv[1:], 'hVcCDPf:d:p:b:e:i:t:',
341 for option, value in options:
344 elif option == '--help':
348 elif option == '--version':
351 GlobalOptions.recode_charset = 1
353 GlobalOptions.recode_charset = 0
355 GlobalOptions.default_charset = value
357 GlobalOptions.decode_headers.append(value)
359 GlobalOptions.decode_headers = []
361 GlobalOptions.decode_header_params.append(value.split(':', 1))
363 GlobalOptions.decode_header_params = []
365 GlobalOptions.totext_mask.append(value)
367 GlobalOptions.binary_mask.append(value)
369 GlobalOptions.ignore_mask.append(value)
371 GlobalOptions.error_mask.append(value)
378 if __name__ == "__main__":
386 elif arguments[0] == '-':
389 infile = open(arguments[0], 'r')