1 """Decode MIME message"""
7 from .__version__ import __version__, __copyright__
9 if sys.version_info[0] >= 3:
10 # Replace email.message._formatparam with _formatparam from Python 2.7
11 # to avoid re-encoding non-ascii params.
12 from mimedecode import formatparam_27 # noqa: F401: Imported for its side effect
14 me = os.path.basename(sys.argv[0])
17 def output_headers(msg):
18 unix_from = msg.get_unixfrom()
22 for key, value in msg.items():
25 value = value.split(';', 1)
29 output(_decode_header(value[1], strip=False))
31 output(os.linesep) # End of headers
34 def recode_if_needed(s, charset):
35 if bytes is str: # Python2
36 if isinstance(s, bytes) and \
37 charset and charset.lower() != g.default_encoding:
38 s = s.decode(charset, "replace").\
39 encode(g.default_encoding, "replace")
41 if isinstance(s, bytes):
42 s = s.decode(charset, "replace")
46 def _decode_header(s, strip=True):
47 """Return a decoded string according to RFC 2047.
48 NOTE: This is almost the same as email.Utils.decode.
52 L = email.header.decode_header(s)
53 if not isinstance(L, list):
58 for atom, charset in L:
59 atom = recode_if_needed(atom, charset or g.default_encoding)
64 # Now that we've decoded everything, we just need to join all the parts
65 # together into the final string.
69 def decode_header(msg, header):
70 "Decode mail header (if exists) and put it back, if it was encoded"
74 new_value = _decode_header(value)
75 if new_value != value: # do not bother to touch msg if not changed
76 set_header(msg, header, new_value)
79 def decode_header_param(msg, header, param):
80 """Decode mail header's parameter
82 Decode mail header's parameter (if exists)
83 and put it back if it was encoded.
86 value = msg.get_param(param, header=header)
88 if isinstance(value, tuple):
89 new_value = recode_if_needed(value[2], value[0])
91 new_value = _decode_header(value)
92 if new_value != value: # do not bother to touch msg if not changed
93 msg.set_param(param, new_value, header)
96 def _get_exceptions(list):
97 return [x[1:].lower() for x in list[1:] if x[0] == '-']
100 def _decode_headers_params(msg, header, decode_all_params, param_list):
101 if decode_all_params:
102 params = msg.get_params(header=header)
104 for param, value in params:
105 if param not in param_list:
106 decode_header_param(msg, header, param)
108 for param in param_list:
109 decode_header_param(msg, header, param)
112 def _remove_headers_params(msg, header, remove_all_params, param_list):
113 if remove_all_params:
114 params = msg.get_params(header=header)
117 for param, value in params:
118 if param not in param_list:
119 msg.del_param(param, header)
122 if value is None: # No such header
124 if ';' not in value: # There are no parameters
126 del msg[header] # Delete all such headers
127 # Get the value without parameters and set it back
128 msg[header] = value.split(';')[0].strip()
130 for param in param_list:
131 msg.del_param(param, header)
134 def decode_headers(msg):
135 "Decode message headers according to global options"
137 for header_list in g.remove_headers:
138 header_list = header_list.split(',')
139 if header_list[0] == '*': # Remove all headers except listed
140 header_list = _get_exceptions(header_list)
141 for header in msg.keys():
142 if header.lower() not in header_list:
144 else: # Remove listed headers
145 for header in header_list:
148 for header_list, param_list in g.remove_headers_params:
149 header_list = header_list.split(',')
150 param_list = param_list.split(',')
151 # Remove all params except listed.
152 remove_all_params = param_list[0] == '*'
153 if remove_all_params:
154 param_list = _get_exceptions(param_list)
155 if header_list[0] == '*': # Remove for all headers except listed
156 header_list = _get_exceptions(header_list)
157 for header in msg.keys():
158 if header.lower() not in header_list:
159 _remove_headers_params(
160 msg, header, remove_all_params, param_list)
161 else: # Decode for listed headers
162 for header in header_list:
163 _remove_headers_params(
164 msg, header, remove_all_params, param_list)
166 for header_list in g.decode_headers:
167 header_list = header_list.split(',')
168 if header_list[0] == '*': # Decode all headers except listed
169 header_list = _get_exceptions(header_list)
170 for header in msg.keys():
171 if header.lower() not in header_list:
172 decode_header(msg, header)
173 else: # Decode listed headers
174 for header in header_list:
175 decode_header(msg, header)
177 for header_list, param_list in g.decode_header_params:
178 header_list = header_list.split(',')
179 param_list = param_list.split(',')
180 # Decode all params except listed.
181 decode_all_params = param_list[0] == '*'
182 if decode_all_params:
183 param_list = _get_exceptions(param_list)
184 if header_list[0] == '*': # Decode for all headers except listed
185 header_list = _get_exceptions(header_list)
186 for header in msg.keys():
187 if header.lower() not in header_list:
188 _decode_headers_params(
189 msg, header, decode_all_params, param_list)
190 else: # Decode for listed headers
191 for header in header_list:
192 _decode_headers_params(
193 msg, header, decode_all_params, param_list)
196 def set_header(msg, header, value):
200 msg.replace_header(header, value)
205 def set_content_type(msg, newtype, charset=None):
206 msg.set_type(newtype)
209 msg.set_param("charset", charset, "Content-Type")
212 caps = None # Globally stored mailcap database; initialized only if needed
215 def decode_body(msg, s):
216 "Decode body to plain text using first copiousoutput filter from mailcap"
223 caps = mailcap.getcaps()
225 content_type = msg.get_content_type()
226 if content_type.startswith('text/'):
227 charset = msg.get_content_charset()
230 tmpfile = tempfile.NamedTemporaryFile()
233 entries = mailcap.lookup(caps, content_type, "view")
234 for entry in entries:
235 if 'copiousoutput' in entry:
237 test = mailcap.subst(entry['test'], content_type, tmpfile.name)
238 if test and os.system(test) != 0:
240 command = mailcap.subst(entry["view"], content_type, tmpfile.name)
246 if charset and bytes is not str and isinstance(s, bytes): # Python3
247 s = s.decode(charset, "replace")
248 if not isinstance(s, bytes):
249 s = s.encode(g.default_encoding, "replace")
253 pipe = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
254 new_s = pipe.stdout.read()
256 if pipe.wait() == 0: # result=0, Ok
258 if bytes is not str and isinstance(s, bytes): # Python3
259 s = s.decode(g.default_encoding, "replace")
260 if charset and not isinstance(s, bytes):
261 s = s.encode(charset, "replace")
262 set_content_type(msg, "text/plain")
263 msg["X-MIME-Autoconverted"] = \
264 "from %s to text/plain by %s id %s" \
265 % (content_type, g.host_name, command.split()[0])
267 msg["X-MIME-Autoconverted"] = \
268 "failed conversion from %s to text/plain by %s id %s" \
269 % (content_type, g.host_name, command.split()[0])
270 tmpfile.close() # Will be removed on close
275 def recode_charset(msg, s):
276 "Recode charset of the message to the default charset"
278 save_charset = charset = msg.get_content_charset()
279 if charset and charset.lower() != g.default_encoding:
280 s = recode_if_needed(s, charset)
281 content_type = msg.get_content_type()
282 set_content_type(msg, content_type, g.default_encoding)
283 msg["X-MIME-Autoconverted"] = \
284 "from %s to %s by %s id %s" \
285 % (save_charset, g.default_encoding, g.host_name, me)
289 def totext(msg, instring):
290 "Convert instring content to text"
292 # Decode body and recode charset
293 s = decode_body(msg, instring)
295 s = recode_charset(msg, s)
305 def _guess_extension(ctype):
307 if mimetypes is None:
310 user_mime_type = os.path.expanduser('~/.mime.types')
311 if os.path.exists(user_mime_type):
312 mimetypes._db.read(user_mime_type)
313 return mimetypes.guess_extension(ctype)
316 def _save_message(msg, outstring, save_headers=False, save_body=False):
317 for header, param in (
318 ("Content-Disposition", "filename"),
319 ("Content-Type", "name"),
321 fname = msg.get_param(param, header=header)
323 if isinstance(fname, tuple):
324 fname = fname[2] # Do not recode if it isn't recoded yet
326 for forbidden in chr(0), '/', '\\':
327 if forbidden in fname:
336 fname = str(g.save_counter) + fname
338 ext = _guess_extension(msg.get_content_type())
344 outfile = open_output_file(fname)
346 def _output_bytes(s):
347 if not isinstance(s, bytes):
348 s = s.encode(g.default_encoding, "replace")
351 output = _output_bytes
360 def decode_part(msg):
361 "Decode one part of the message"
365 # Test all mask lists and find what to do with this content type
367 ctype = msg.get_content_type()
370 mtype = ctype.split('/')[0]
371 masks.append(mtype + '/*')
375 for content_type in masks:
376 if content_type in g.totext_mask or \
377 content_type in g.decoded_binary_mask:
379 elif content_type in g.binary_mask:
382 elif content_type in g.fully_ignore_mask:
385 encoding = msg["Content-Transfer-Encoding"]
386 if left_binary or encoding in (None, '', '7bit', '8bit', 'binary'):
387 outstring = msg.get_payload()
388 else: # Decode from transfer ecoding to text or binary form
389 outstring = msg.get_payload(decode=1)
390 set_header(msg, "Content-Transfer-Encoding", "8bit")
391 msg["X-MIME-Autoconverted"] = \
392 "from %s to 8bit by %s id %s" % (encoding, g.host_name, me)
394 for content_type in masks:
395 if content_type in g.totext_mask:
396 outstring = totext(msg, outstring)
398 elif content_type in g.binary_mask or \
399 content_type in g.decoded_binary_mask:
403 elif content_type in g.ignore_mask:
405 output("%sMessage body of type %s skipped.%s"
406 % (os.linesep, ctype, os.linesep))
408 elif content_type in g.error_mask:
411 # Neither content type nor masks were listed - decode by default
412 outstring = totext(msg, outstring)
414 for content_type in masks:
415 if content_type in g.save_headers_mask:
416 _save_message(msg, outstring, save_headers=True, save_body=False)
417 if content_type in g.save_body_mask:
418 _save_message(msg, outstring, save_headers=False, save_body=True)
419 if content_type in g.save_message_mask:
420 _save_message(msg, outstring, save_headers=True, save_body=True)
422 for content_type in masks:
423 if content_type in g.error_mask:
424 raise ValueError("content type %s prohibited" % ctype)
427 def decode_multipart(msg):
431 boundary = msg.get_boundary()
434 ctype = msg.get_content_type()
437 mtype = ctype.split('/')[0]
438 masks.append(mtype + '/*')
441 for content_type in masks:
442 if content_type in g.fully_ignore_mask:
444 elif content_type in g.ignore_mask:
446 output("%sMessage body of type %s skipped.%s"
447 % (os.linesep, ctype, os.linesep))
449 output("%s--%s--%s" % (os.linesep, boundary, os.linesep))
452 for content_type in masks:
453 if content_type in g.save_body_mask or \
454 content_type in g.save_message_mask:
457 for subpart in msg.get_payload():
459 first_subpart = False
461 _out_l.append(os.linesep)
462 _out_l.append("--%s%s" % (boundary, os.linesep))
463 _out_l.append(subpart.as_string())
464 _out_l.append("%s--%s--%s" % (os.linesep, boundary, os.linesep))
465 outstring = ''.join(_out_l)
470 for content_type in masks:
471 if content_type in g.save_headers_mask:
472 _save_message(msg, outstring, save_headers=True, save_body=False)
473 if content_type in g.save_body_mask:
474 _save_message(msg, outstring, save_headers=False, save_body=True)
475 if content_type in g.save_message_mask:
476 _save_message(msg, outstring, save_headers=True, save_body=True)
478 for content_type in masks:
479 if content_type in g.error_mask:
480 raise ValueError("content type %s prohibited" % ctype)
484 # Preserve the first part, it is probably not a RFC822-message.
486 # Usually it is just a few lines of text (MIME warning).
488 if msg.preamble is not None:
492 for subpart in msg.get_payload():
495 first_subpart = False
498 output("--%s%s" % (boundary, os.linesep))
500 # Recursively decode all parts of the subpart
501 decode_message(subpart)
504 output("%s--%s--%s" % (os.linesep, boundary, os.linesep))
510 def decode_message(msg):
513 if msg.is_multipart():
514 decode_multipart(msg)
515 elif len(msg): # Simple one-part message (there are headers) - decode it
517 else: # Not a message, just text - copy it literally
518 output(msg.as_string())
521 def open_output_file(filename):
522 fullpath = os.path.abspath(os.path.join(g.destination_dir, filename))
523 full_dir = os.path.dirname(fullpath)
524 create = not os.path.isdir(full_dir)
526 os.makedirs(full_dir)
528 return open(fullpath, 'wb')
531 os.removedirs(full_dir)