2 """Decode MIME message"""
6 from mimedecode_version import __version__, \
7 __author__, __copyright__, __license__
8 if sys.version_info[0] >= 3:
9 # Replace email.message._formatparam with _formatparam from Python 2.7
10 # to avoid re-encoding non-ascii params.
13 me = os.path.basename(sys.argv[0])
18 Broytman mimedecode.py version %s, %s
19 """ % (__version__, __copyright__))
23 def usage(code=0, errormsg=''):
26 Usage: %s [-h|--help] [-V|--version] [-cCDP] [-H|--host=hostname] [-f charset] [-d header1[,h2,...]|*[,-h1,...]] [-p header1[,h2,h3,...]:param1[,p2,p3,...]] [-r header1[,h2,...]|*[,-h1,...]] [-R header1[,h2,h3,...]:param1[,p2,p3,...]] [--set-header header:value] [--set-param header:param=value] [-Bbeit mask] [--save-headers|body|message mask] [-O dest_dir] [-o output_file] [input_file [output_file]]
29 sys.stderr.write(errormsg + os.linesep)
33 def output_headers(msg):
34 unix_from = msg.get_unixfrom()
38 for key, value in msg.items():
41 value = value.split(';', 1)
45 output(_decode_header(value[1], strip=False))
47 output(os.linesep) # End of headers
50 def recode_if_needed(s, charset):
51 if bytes is str: # Python2
52 if isinstance(s, bytes) and \
53 charset and charset.lower() != g.default_encoding:
54 s = s.decode(charset, "replace").\
55 encode(g.default_encoding, "replace")
57 if isinstance(s, bytes):
58 s = s.decode(charset, "replace")
62 def _decode_header(s, strip=True):
63 """Return a decoded string according to RFC 2047.
64 NOTE: This is almost the same as email.Utils.decode.
68 L = email.header.decode_header(s)
69 if not isinstance(L, list):
74 for atom, charset in L:
75 atom = recode_if_needed(atom, charset or g.default_encoding)
80 # Now that we've decoded everything, we just need to join all the parts
81 # together into the final string.
85 def decode_header(msg, header):
86 "Decode mail header (if exists) and put it back, if it was encoded"
90 new_value = _decode_header(value)
91 if new_value != value: # do not bother to touch msg if not changed
92 set_header(msg, header, new_value)
95 def decode_header_param(msg, header, param):
96 "Decode mail header's parameter (if exists) and put it back, if it was encoded"
99 value = msg.get_param(param, header=header)
101 if isinstance(value, tuple):
102 new_value = recode_if_needed(value[2], value[0])
104 new_value = _decode_header(value)
105 if new_value != value: # do not bother to touch msg if not changed
106 msg.set_param(param, new_value, header)
109 def _get_exceptions(list):
110 return [x[1:].lower() for x in list[1:] if x[0] == '-']
113 def _decode_headers_params(msg, header, decode_all_params, param_list):
114 if decode_all_params:
115 params = msg.get_params(header=header)
117 for param, value in params:
118 if param not in param_list:
119 decode_header_param(msg, header, param)
121 for param in param_list:
122 decode_header_param(msg, header, param)
125 def _remove_headers_params(msg, header, remove_all_params, param_list):
126 if remove_all_params:
127 params = msg.get_params(header=header)
130 for param, value in params:
131 if param not in param_list:
132 msg.del_param(param, header)
135 if value is None: # No such header
137 if ';' not in value: # There are no parameters
139 del msg[header] # Delete all such headers
140 # Get the value without parameters and set it back
141 msg[header] = value.split(';')[0].strip()
143 for param in param_list:
144 msg.del_param(param, header)
147 def decode_headers(msg):
148 "Decode message headers according to global options"
150 for header_list in g.remove_headers:
151 header_list = header_list.split(',')
152 if header_list[0] == '*': # Remove all headers except listed
153 header_list = _get_exceptions(header_list)
154 for header in msg.keys():
155 if header.lower() not in header_list:
157 else: # Remove listed headers
158 for header in header_list:
161 for header_list, param_list in g.remove_headers_params:
162 header_list = header_list.split(',')
163 param_list = param_list.split(',')
164 remove_all_params = param_list[0] == '*' # Remove all params except listed
165 if remove_all_params:
166 param_list = _get_exceptions(param_list)
167 if header_list[0] == '*': # Remove for all headers except listed
168 header_list = _get_exceptions(header_list)
169 for header in msg.keys():
170 if header.lower() not in header_list:
171 _remove_headers_params(msg, header, remove_all_params, param_list)
172 else: # Decode for listed headers
173 for header in header_list:
174 _remove_headers_params(msg, header, remove_all_params, param_list)
176 for header_list in g.decode_headers:
177 header_list = header_list.split(',')
178 if header_list[0] == '*': # Decode all headers except listed
179 header_list = _get_exceptions(header_list)
180 for header in msg.keys():
181 if header.lower() not in header_list:
182 decode_header(msg, header)
183 else: # Decode listed headers
184 for header in header_list:
185 decode_header(msg, header)
187 for header_list, param_list in g.decode_header_params:
188 header_list = header_list.split(',')
189 param_list = param_list.split(',')
190 decode_all_params = param_list[0] == '*' # Decode all params except listed
191 if decode_all_params:
192 param_list = _get_exceptions(param_list)
193 if header_list[0] == '*': # Decode for all headers except listed
194 header_list = _get_exceptions(header_list)
195 for header in msg.keys():
196 if header.lower() not in header_list:
197 _decode_headers_params(msg, header, decode_all_params, param_list)
198 else: # Decode for listed headers
199 for header in header_list:
200 _decode_headers_params(msg, header, decode_all_params, param_list)
203 def set_header(msg, header, value):
207 msg.replace_header(header, value)
212 def set_content_type(msg, newtype, charset=None):
213 msg.set_type(newtype)
216 msg.set_param("charset", charset, "Content-Type")
219 caps = None # Globally stored mailcap database; initialized only if needed
222 def decode_body(msg, s):
223 "Decode body to plain text using first copiousoutput filter from mailcap"
225 import mailcap, tempfile
229 caps = mailcap.getcaps()
231 content_type = msg.get_content_type()
232 if content_type.startswith('text/'):
233 charset = msg.get_content_charset()
236 filename = tempfile.mktemp()
239 entries = mailcap.lookup(caps, content_type, "view")
240 for entry in entries:
241 if 'copiousoutput' in entry:
243 test = mailcap.subst(entry['test'], content_type, filename)
244 if test and os.system(test) != 0:
246 command = mailcap.subst(entry["view"], content_type, filename)
252 outfile = open(filename, 'wb')
253 if charset and bytes is not str and isinstance(s, bytes): # Python3
254 s = s.decode(charset, "replace")
255 if not isinstance(s, bytes):
256 s = s.encode(g.default_encoding, "replace")
260 pipe = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
261 new_s = pipe.stdout.read()
263 if pipe.wait() == 0: # result=0, Ok
265 if bytes is not str and isinstance(s, bytes): # Python3
266 s = s.decode(g.default_encoding, "replace")
267 if charset and not isinstance(s, bytes):
268 s = s.encode(charset, "replace")
269 set_content_type(msg, "text/plain")
270 msg["X-MIME-Autoconverted"] = "from %s to text/plain by %s id %s" % (content_type, g.host_name, command.split()[0])
272 msg["X-MIME-Autoconverted"] = "failed conversion from %s to text/plain by %s id %s" % (content_type, g.host_name, command.split()[0])
278 def recode_charset(msg, s):
279 "Recode charset of the message to the default charset"
281 save_charset = charset = msg.get_content_charset()
282 if charset and charset.lower() != g.default_encoding:
283 s = recode_if_needed(s, charset)
284 content_type = msg.get_content_type()
285 set_content_type(msg, content_type, g.default_encoding)
286 msg["X-MIME-Autoconverted"] = "from %s to %s by %s id %s" % (save_charset, g.default_encoding, g.host_name, me)
290 def totext(msg, instring):
291 "Convert instring content to text"
293 # Decode body and recode charset
294 s = decode_body(msg, instring)
296 s = recode_charset(msg, s)
306 def _guess_extension(ctype):
308 if mimetypes is None:
311 user_mime_type = os.path.expanduser('~/.mime.types')
312 if os.path.exists(user_mime_type):
313 mimetypes._db.read(user_mime_type)
314 return mimetypes.guess_extension(ctype)
317 def _save_message(msg, outstring, save_headers=False, save_body=False):
318 for header, param in (
319 ("Content-Disposition", "filename"),
320 ("Content-Type", "name"),
322 fname = msg.get_param(param, header=header)
324 if isinstance(fname, tuple):
325 fname = fname[2] # Do not recode if it isn't recoded yet
327 for forbidden in chr(0), '/', '\\':
328 if forbidden in fname:
337 fname = str(g.save_counter) + fname
339 ext = _guess_extension(msg.get_content_type())
344 outfile = open_output_file(fname)
345 def _output_bytes(s):
346 if not isinstance(s, bytes):
347 s = s.encode(g.default_encoding, "replace")
349 output = _output_bytes
358 def decode_part(msg):
359 "Decode one part of the message"
363 # Test all mask lists and find what to do with this content type
365 ctype = msg.get_content_type()
368 mtype = ctype.split('/')[0]
369 masks.append(mtype + '/*')
373 for content_type in masks:
374 if content_type in g.totext_mask or \
375 content_type in g.decoded_binary_mask:
377 elif content_type in g.binary_mask:
380 elif content_type in g.fully_ignore_mask:
383 encoding = msg["Content-Transfer-Encoding"]
384 if left_binary or encoding in (None, '', '7bit', '8bit', 'binary'):
385 outstring = msg.get_payload()
386 else: # Decode from transfer ecoding to text or binary form
387 outstring = msg.get_payload(decode=1)
388 set_header(msg, "Content-Transfer-Encoding", "8bit")
389 msg["X-MIME-Autoconverted"] = "from %s to 8bit by %s id %s" % (encoding, g.host_name, me)
391 for content_type in masks:
392 if content_type in g.totext_mask:
393 outstring = totext(msg, outstring)
395 elif content_type in g.binary_mask or \
396 content_type in g.decoded_binary_mask:
400 elif content_type in g.ignore_mask:
402 output("%sMessage body of type %s skipped.%s" % (os.linesep, ctype, os.linesep))
404 elif content_type in g.error_mask:
407 # Neither content type nor masks were listed - decode by default
408 outstring = totext(msg, outstring)
410 for content_type in masks:
411 if content_type in g.save_headers_mask:
412 _save_message(msg, outstring, save_headers=True, save_body=False)
413 if content_type in g.save_body_mask:
414 _save_message(msg, outstring, save_headers=False, save_body=True)
415 if content_type in g.save_message_mask:
416 _save_message(msg, outstring, save_headers=True, save_body=True)
418 for content_type in masks:
419 if content_type in g.error_mask:
420 raise ValueError("content type %s prohibited" % ctype)
423 def decode_multipart(msg):
427 boundary = msg.get_boundary()
430 ctype = msg.get_content_type()
433 mtype = ctype.split('/')[0]
434 masks.append(mtype + '/*')
437 for content_type in masks:
438 if content_type in g.fully_ignore_mask:
440 elif content_type in g.ignore_mask:
442 output("%sMessage body of type %s skipped.%s" % (os.linesep, ctype, os.linesep))
444 output("%s--%s--%s" % (os.linesep, boundary, os.linesep))
447 for content_type in masks:
448 if content_type in g.save_body_mask or \
449 content_type in g.save_message_mask:
452 for subpart in msg.get_payload():
454 first_subpart = False
456 _out_l.append(os.linesep)
457 _out_l.append("--%s%s" % (boundary, os.linesep))
458 _out_l.append(subpart.as_string())
459 _out_l.append("%s--%s--%s" % (os.linesep, boundary, os.linesep))
460 outstring = ''.join(_out_l)
465 for content_type in masks:
466 if content_type in g.save_headers_mask:
467 _save_message(msg, outstring, save_headers=True, save_body=False)
468 if content_type in g.save_body_mask:
469 _save_message(msg, outstring, save_headers=False, save_body=True)
470 if content_type in g.save_message_mask:
471 _save_message(msg, outstring, save_headers=True, save_body=True)
473 for content_type in masks:
474 if content_type in g.error_mask:
475 raise ValueError("content type %s prohibited" % ctype)
479 if msg.preamble: # Preserve the first part, it is probably not a RFC822-message
480 output(msg.preamble) # Usually it is just a few lines of text (MIME warning)
481 if msg.preamble is not None:
485 for subpart in msg.get_payload():
488 first_subpart = False
491 output("--%s%s" % (boundary, os.linesep))
493 # Recursively decode all parts of the subpart
494 decode_message(subpart)
497 output("%s--%s--%s" % (os.linesep, boundary, os.linesep))
503 def decode_message(msg):
506 if msg.is_multipart():
507 decode_multipart(msg)
508 elif len(msg): # Simple one-part message (there are headers) - decode it
510 else: # Not a message, just text - copy it literally
511 output(msg.as_string())
514 def open_output_file(filename):
515 fullpath = os.path.abspath(os.path.join(g.destination_dir, filename))
516 full_dir = os.path.dirname(fullpath)
517 create = not os.path.isdir(full_dir)
519 os.makedirs(full_dir)
521 return open(fullpath, 'wb')
524 os.removedirs(full_dir)
528 from m_lib.defenc import default_encoding
529 recode_charset = 1 # recode charset of message body
533 # A list of headers to decode
534 decode_headers = ["From", "To", "Cc", "Reply-To", "Mail-Followup-To",
537 # A list of headers parameters to decode
538 decode_header_params = [
539 ("Content-Type", "name"),
540 ("Content-Disposition", "filename"),
543 # A list of headers to remove
545 # A list of headers parameters to remove
546 remove_headers_params = []
548 # A list of header/value pairs to set
549 set_header_value = []
550 # A list of header/parameter/value triples to set
551 set_header_param = []
553 totext_mask = [] # A list of content-types to decode
554 binary_mask = [] # A list of content-types to pass through
555 decoded_binary_mask = [] # A list of content-types to pass through (content-transfer-decoded)
556 ignore_mask = [] # Ignore (do not decode and do not include into output) but output a warning instead of the body
557 fully_ignore_mask = [] # Completely ignore - no headers, no body, no warning
558 error_mask = [] # Raise error if encounter one of these
561 save_headers_mask = []
563 save_message_mask = []
565 input_filename = None
566 output_filename = None
567 destination_dir = os.curdir
573 from getopt import getopt, GetoptError
576 options, arguments = getopt(
578 'hVcCDPH:f:d:p:r:R:b:B:e:I:i:t:O:o:',
579 ['help', 'version', 'host=',
580 'save-headers=', 'save-body=', 'save-message=',
581 'set-header=', 'set-param='])
585 for option, value in options:
586 if option in ('-h', '--help'):
588 elif option in ('-V', '--version'):
594 elif option in ('-H', '--host'):
597 g.default_encoding = value
599 if value.startswith('*'):
600 g.decode_headers = []
601 g.decode_headers.append(value)
603 g.decode_headers = []
605 g.decode_header_params.append(value.split(':', 1))
607 g.decode_header_params = []
609 g.remove_headers.append(value)
611 g.remove_headers_params.append(value.split(':', 1))
612 elif option == '--set-header':
613 g.set_header_value.append(value.split(':', 1))
614 elif option == '--set-param':
615 header, value = value.split(':', 1)
617 param, value = value.split('=', 1)
619 param, value = value.split(':', 1)
620 g.set_header_param.append((header, param, value))
622 g.totext_mask.append(value)
624 g.binary_mask.append(value)
626 g.decoded_binary_mask.append(value)
628 g.fully_ignore_mask.append(value)
630 g.ignore_mask.append(value)
632 g.error_mask.append(value)
633 elif option == '--save-headers':
634 g.save_headers_mask.append(value)
635 elif option == '--save-body':
636 g.save_body_mask.append(value)
637 elif option == '--save-message':
638 g.save_message_mask.append(value)
640 g.destination_dir = value
642 g.output_filename = value
649 if __name__ == "__main__":
650 arguments = get_opts()
654 g.input_filename = '-'
656 if g.output_filename:
657 outfile = open_output_file(g.output_filename)
659 g.output_filename = '-'
662 if (arguments[0] == '-'):
663 g.input_filename = '-'
666 g.input_filename = arguments[0]
667 infile = open(arguments[0], 'r')
669 if g.output_filename:
670 outfile = open_output_file(g.output_filename)
672 g.output_filename = '-'
675 if g.output_filename:
676 usage(1, 'Too many output filenames')
677 if (arguments[1] == '-'):
678 g.output_filename = '-'
681 g.output_filename = arguments[1]
682 outfile = open_output_file(g.output_filename)
684 usage(1, 'Too many arguments')
686 if (infile is sys.stdin) and sys.stdin.isatty():
687 if (outfile is sys.stdout) and sys.stdout.isatty():
689 usage(1, 'Filtering from console is forbidden')
693 g.host_name = socket.gethostname()
696 if hasattr(outfile, 'buffer'):
698 if not isinstance(s, bytes):
699 s = s.encode(g.default_encoding, "replace")
700 outfile.buffer.write(s)
701 output = output_bytes
703 output = outfile.write
706 msg = email.message_from_file(infile)
708 for header, value in g.set_header_value:
709 set_header(msg, header, value)
711 for header, param, value in g.set_header_param:
713 msg.set_param(param, value, header)