1 #! /usr/local/bin/python -O
2 """Decode MIME message.
4 Author: Oleg Broytmann <phd@phd.pp.ru>
5 Copyright: (C) 2001-2002 PhiloSoft Design
16 from cStringIO import StringIO
18 from StringIO import StringIO
22 host_name = socket.gethostname()
24 me = os.path.basename(sys.argv[0])
29 Usage: %s [-h|--help] [-V|--version] [-cCDP] [-f charset] [-d header] [-p header:param] [-beit mask] [filename]
36 BroytMann mimedecode.py version %s
44 def output_headers(msg):
50 def recode(s, charset):
51 return unicode(s, charset, "replace").encode(GlobalOptions.default_charset, "replace")
54 def recode2(s, charset):
55 if charset and charset <> GlobalOptions.default_charset:
56 charset = charset.lower()
57 s = recode(s, charset)
61 def getparam(msg, header, param):
62 "Get parameter from the header; return the header without the parameter, parameter itself and rfc2231 flag"
64 if not msg.has_key(header):
68 parts = [part.strip() for part in header.split(';')]
70 new_parts = [parts[0]] # The header itself
77 rfc2231_continuation = re.compile("^%s\\*[0-9]+\\*?$" % param)
81 name, value = part.split('=', 1)
82 # The code is incomplete. Continuations in rfc2231-encoded paramters
83 # (header*1, header*2, etc) are not yet supported
84 if (name == param) or (name == param + '*'):
85 new_value = rfc822.unquote(value)
86 rfc2231_encoded += (name <> param)
87 elif rfc2231_continuation.match(name):
88 rfc2231_header.append(rfc822.unquote(value))
91 new_parts.append(part)
94 new_value = ''.join(rfc2231_header)
96 if new_value is not None:
97 return "; ".join(new_parts), new_value, rfc2231_encoded
102 def decode_header(msg, header):
103 "Decode mail header (if exists) and put it back, if it was encoded"
105 if msg.has_key(header):
107 new_value = decode_rfc2047(value)
108 if value <> new_value: # do not bother to touch msg if not changed
109 msg[header] = new_value
112 def decode_header_param(msg, header, param):
113 "Decode mail header's parameter (if exists) and put it back, if it was encoded"
115 if msg.has_key(header):
116 new_value, pstr, rfc2231_encoded = getparam(msg, header, param)
119 new_str = decode_rfc2231(pstr)
121 new_str = decode_rfc2047(pstr)
122 if pstr <> new_str: # do not bother to touch msg if not changed
123 msg[header] = "%s; %s=\"%s\"" % (new_value, param, new_str)
126 def decode_rfc2047(s):
127 "Decode string according to rfc2047"
129 parts = s.split() # by whitespaces
136 if l[0] <> '=' or l[4] <> '=': # assert correct format
137 new_parts.append(' ')
138 new_parts.append(s) # if not encoded - just put it into output
143 new_parts.append(' ') # no space between encoded parts, one space otherwise
146 charset = l[1].lower()
147 encoding = l[2].lower()
151 charset, language = charset.split('*', 1) # language ignored
157 from base64 import decode
158 elif encoding == "q":
159 from quopri import decode
161 raise ValueError, "wrong encoding `%s' (expected 'b' or 'q')" % encoding
163 decode(infile, outfile)
164 s = outfile.getvalue()
166 if charset == GlobalOptions.default_charset:
167 new_parts.append(s) # do not recode
170 s = recode(s, charset)
173 if new_parts and new_parts[0] == ' ':
175 return ''.join(new_parts)
178 def decode_rfc2231(s):
179 "Decode string according to rfc2231"
181 charset, language, s = s.split("'", 2) # language ignored
190 c = chr(int(s[i:i+2], 16))
196 s = recode2(s, charset)
200 def decode_headers(msg):
201 "Decode message headers according to global options"
203 for header in GlobalOptions.decode_headers:
204 decode_header(msg, header)
206 for header, param in GlobalOptions.decode_header_params:
207 decode_header_param(msg, header, param)
208 if header.lower() == "content-type" and msg.has_key(header):
210 msg.typeheader = msg[header]
211 msg.parsetype() # required for plist...
212 msg.parseplist() #... and reparse decoded plist
215 def set_content_type(msg, newtype, charset=None):
216 plist = msg.getplist()
221 if p.split('=')[0] == "charset":
222 p = "charset=%s" % charset
227 plist = ["charset=%s" % charset]
232 if plist and plist[0]: plist.insert(0, '')
233 msg["Content-Type"] = "%s%s" % (newtype, ";\n ".join(plist))
236 caps = None # Globally stored mailcap database; initialized only if needed
238 def decode_body(msg, s):
239 "Decode body to plain text using first copiousoutput filter from mailcap"
241 import mailcap, tempfile
245 caps = mailcap.getcaps()
247 content_type = msg.gettype()
248 filename = tempfile.mktemp()
251 entries = mailcap.lookup(caps, content_type, "view")
252 for entry in entries:
253 if entry.has_key('copiousoutput'):
254 if entry.has_key('test'):
255 test = mailcap.subst(entry['test'], content_type, filename)
256 if test and os.system(test) != 0:
258 command = mailcap.subst(entry["view"], content_type, filename)
264 file = open(filename, 'w')
268 pipe = os.popen(command, 'r')
273 set_content_type(msg, "text/plain")
274 msg["X-MIME-Body-Autoconverted"] = "from %s to text/plain by %s id %s" % (content_type, host_name, command.split()[0])
276 msg.maintype = "text"
277 msg.subtype = "plain"
278 msg.type = "text/plain"
283 def recode_charset(msg, s):
284 "Recode charset of the message to the default charset"
286 save_charset = charset = msg.getparam("charset")
287 if charset and charset <> GlobalOptions.default_charset:
288 s = recode2(s, charset)
289 content_type = msg.gettype()
290 set_content_type(msg, content_type, GlobalOptions.default_charset)
291 msg["X-MIME-Charset-Autoconverted"] = "from %s to %s by %s id %s" % (save_charset, GlobalOptions.default_charset, host_name, me)
295 def totext(msg, infile):
296 "Convert infile (StringIO) content to text"
298 if msg.getmaintype() == "multipart": # Recursively decode all parts of the multipart message
299 newfile = StringIO("%s\n%s" % (msg, infile.getvalue()))
303 # Decode body and recode charset
304 s = decode_body(msg, infile.getvalue())
305 if GlobalOptions.recode_charset:
306 s = recode_charset(msg, s)
312 def decode_part(msg, infile):
313 "Decode one part of the message"
315 encoding = msg.getencoding()
318 if encoding in ('', '7bit', '8bit', 'binary'):
319 mimetools.copyliteral(infile, outfile)
320 else: # Decode from transfer ecoding to text or binary form
321 mimetools.decode(infile, outfile, encoding)
322 msg["Content-Transfer-Encoding"] = "8bit"
323 msg["X-MIME-Autoconverted"] = "from %s to 8bit by %s id %s" % (encoding, host_name, me)
327 # Test all mask lists and find what to do with this content type
329 for content_type in msg.gettype(), msg.getmaintype()+"/*", "*/*":
330 if content_type in GlobalOptions.totext_mask:
333 elif content_type in GlobalOptions.binary_mask:
335 output(outfile.getvalue())
337 elif content_type in GlobalOptions.ignore_mask:
339 output("\nMessage body of type `%s' skipped.\n" % content_type)
341 elif content_type in GlobalOptions.error_mask:
342 raise ValueError, "content type `%s' prohibited" % content_type
344 # Neither content type nor masks were listed - decode by default
348 def decode_file(infile, seekable=1):
349 "Decode the entire message"
351 m = mimetools.Message(infile)
352 boundary = m.getparam("boundary")
355 if not m.getheader("Content-Type"): # Not a message, just text - copy it literally
356 output(infile.read())
358 else: # Simple one-part message - decode it
359 decode_part(m, infile)
361 else: # MIME message - decode all parts; may be recursive
366 mf = multifile.MultiFile(infile, seekable)
369 if not seekable: # Preserve the first part, it is probably not a RFC822-message
370 output(mf.read()) # Usually it is just a few lines of text (MIME warning)
373 m = mimetools.Message(mf)
378 output("\n--%s\n" % boundary)
381 output("\n--%s--\n" % boundary)
385 default_charset = sys.getdefaultencoding()
386 recode_charset = 1 # recode charset of message body
388 decode_headers = ["Subject", "From"] # A list of headers to decode
389 decode_header_params = [("Content-Type", "name"),
390 ("Content-Disposition", "filename")
391 ] # A list of headers' parameters to decode
393 totext_mask = [] # A list of content-types to decode
394 binary_mask = [] # A list to pass through
395 ignore_mask = [] # Ignore (skip, do not decode and do not include into output)
396 error_mask = [] # Raise error if encounter one of these
400 from getopt import getopt, GetoptError
403 options, arguments = getopt(sys.argv[1:], 'hVcCDPf:d:p:b:e:i:t:',
408 for option, value in options:
411 elif option == '--help':
415 elif option == '--version':
418 GlobalOptions.recode_charset = 1
420 GlobalOptions.recode_charset = 0
422 GlobalOptions.default_charset = value
424 GlobalOptions.decode_headers.append(value)
426 GlobalOptions.decode_headers = []
428 GlobalOptions.decode_header_params.append(value.split(':', 1))
430 GlobalOptions.decode_header_params = []
432 GlobalOptions.totext_mask.append(value)
434 GlobalOptions.binary_mask.append(value)
436 GlobalOptions.ignore_mask.append(value)
438 GlobalOptions.error_mask.append(value)
445 if __name__ == "__main__":
449 if len(arguments) == 0:
451 elif len(arguments) <> 1:
453 elif arguments[0] == '-':
456 infile = open(arguments[0], 'r')
459 decode_file(infile, seekable)