1 """Decode MIME message"""
8 from .__version__ import __version__, __copyright__
10 if sys.version_info[0] >= 3:
11 # Replace email.message._formatparam with _formatparam from Python 2.7
12 # to avoid re-encoding non-ascii params.
13 from mimedecode import formatparam_27 # noqa: F401: Imported for its side effect
15 me = os.path.basename(sys.argv[0])
18 def output_headers(msg):
19 unix_from = msg.get_unixfrom()
23 for key, value in msg.items():
26 value = value.split(';', 1)
30 output(_decode_header(value[1], strip=False))
32 output(os.linesep) # End of headers
35 def recode_if_needed(s, charset):
36 if bytes is str: # Python2
37 if isinstance(s, bytes) and \
38 charset and charset.lower() != g.default_encoding:
39 s = s.decode(charset, "replace").\
40 encode(g.default_encoding, "replace")
42 if isinstance(s, bytes):
43 s = s.decode(charset, "replace")
47 def _decode_header(s, strip=True):
48 """Return a decoded string according to RFC 2047.
49 NOTE: This is almost the same as email.Utils.decode.
53 L = email.header.decode_header(s)
54 if not isinstance(L, list):
59 for atom, charset in L:
60 atom = recode_if_needed(atom, charset or g.default_encoding)
65 # Now that we've decoded everything, we just need to join all the parts
66 # together into the final string.
70 def decode_header(msg, header):
71 "Decode mail header (if exists) and put it back, if it was encoded"
75 new_value = _decode_header(value)
76 if new_value != value: # do not bother to touch msg if not changed
77 set_header(msg, header, new_value)
80 def decode_header_param(msg, header, param):
81 """Decode mail header's parameter
83 Decode mail header's parameter (if exists)
84 and put it back if it was encoded.
87 value = msg.get_param(param, header=header)
89 if isinstance(value, tuple):
90 new_value = recode_if_needed(value[2], value[0])
92 new_value = _decode_header(value)
93 if new_value != value: # do not bother to touch msg if not changed
94 msg.set_param(param, new_value, header)
97 def _get_exceptions(list):
98 return [x[1:].lower() for x in list[1:] if x[0] == '-']
101 def _decode_headers_params(msg, header, decode_all_params, param_list):
102 if decode_all_params:
103 params = msg.get_params(header=header)
105 for param, value in params:
106 if param not in param_list:
107 decode_header_param(msg, header, param)
109 for param in param_list:
110 decode_header_param(msg, header, param)
113 def _remove_headers_params(msg, header, remove_all_params, param_list):
114 if remove_all_params:
115 params = msg.get_params(header=header)
118 for param, value in params:
119 if param not in param_list:
120 msg.del_param(param, header)
123 if value is None: # No such header
125 if ';' not in value: # There are no parameters
127 del msg[header] # Delete all such headers
128 # Get the value without parameters and set it back
129 msg[header] = value.split(';')[0].strip()
131 for param in param_list:
132 msg.del_param(param, header)
135 def decode_headers(msg):
136 "Decode message headers according to global options"
138 for header_list in g.remove_headers:
139 header_list = header_list.split(',')
140 if header_list[0] == '*': # Remove all headers except listed
141 header_list = _get_exceptions(header_list)
142 for header in msg.keys():
143 if header.lower() not in header_list:
145 else: # Remove listed headers
146 for header in header_list:
149 for header_list, param_list in g.remove_headers_params:
150 header_list = header_list.split(',')
151 param_list = param_list.split(',')
152 # Remove all params except listed.
153 remove_all_params = param_list[0] == '*'
154 if remove_all_params:
155 param_list = _get_exceptions(param_list)
156 if header_list[0] == '*': # Remove for all headers except listed
157 header_list = _get_exceptions(header_list)
158 for header in msg.keys():
159 if header.lower() not in header_list:
160 _remove_headers_params(
161 msg, header, remove_all_params, param_list)
162 else: # Decode for listed headers
163 for header in header_list:
164 _remove_headers_params(
165 msg, header, remove_all_params, param_list)
167 for header_list in g.decode_headers:
168 header_list = header_list.split(',')
169 if header_list[0] == '*': # Decode all headers except listed
170 header_list = _get_exceptions(header_list)
171 for header in msg.keys():
172 if header.lower() not in header_list:
173 decode_header(msg, header)
174 else: # Decode listed headers
175 for header in header_list:
176 decode_header(msg, header)
178 for header_list, param_list in g.decode_header_params:
179 header_list = header_list.split(',')
180 param_list = param_list.split(',')
181 # Decode all params except listed.
182 decode_all_params = param_list[0] == '*'
183 if decode_all_params:
184 param_list = _get_exceptions(param_list)
185 if header_list[0] == '*': # Decode for all headers except listed
186 header_list = _get_exceptions(header_list)
187 for header in msg.keys():
188 if header.lower() not in header_list:
189 _decode_headers_params(
190 msg, header, decode_all_params, param_list)
191 else: # Decode for listed headers
192 for header in header_list:
193 _decode_headers_params(
194 msg, header, decode_all_params, param_list)
197 def set_header(msg, header, value):
201 msg.replace_header(header, value)
206 def set_content_type(msg, newtype, charset=None):
207 msg.set_type(newtype)
210 msg.set_param("charset", charset, "Content-Type")
213 caps = None # Globally stored mailcap database; initialized only if needed
216 def decode_body(msg, s):
217 "Decode body to plain text using first copiousoutput filter from mailcap"
224 caps = mailcap.getcaps()
226 content_type = msg.get_content_type()
227 if content_type.startswith('text/'):
228 charset = msg.get_content_charset()
231 tmpfile = tempfile.NamedTemporaryFile()
234 entries = mailcap.lookup(caps, content_type, "view")
235 for entry in entries:
236 if 'copiousoutput' in entry:
238 test = mailcap.subst(entry['test'], content_type, tmpfile.name)
239 if test and os.system(test) != 0:
241 command = mailcap.subst(entry["view"], content_type, tmpfile.name)
247 if charset and bytes is not str and isinstance(s, bytes): # Python3
248 s = s.decode(charset, "replace")
249 if not isinstance(s, bytes):
250 s = s.encode(g.default_encoding, "replace")
254 pipe = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
255 new_s = pipe.stdout.read()
257 if pipe.wait() == 0: # result=0, Ok
259 if bytes is not str and isinstance(s, bytes): # Python3
260 s = s.decode(g.default_encoding, "replace")
261 if charset and not isinstance(s, bytes):
262 s = s.encode(charset, "replace")
263 set_content_type(msg, "text/plain")
264 msg["X-MIME-Autoconverted"] = \
265 "from %s to text/plain by %s id %s" \
266 % (content_type, g.host_name, command.split()[0])
268 msg["X-MIME-Autoconverted"] = \
269 "failed conversion from %s to text/plain by %s id %s" \
270 % (content_type, g.host_name, command.split()[0])
271 tmpfile.close() # Will be removed on close
276 def recode_charset(msg, s):
277 "Recode charset of the message to the default charset"
279 save_charset = charset = msg.get_content_charset()
280 if charset and charset.lower() != g.default_encoding:
281 s = recode_if_needed(s, charset)
282 content_type = msg.get_content_type()
283 set_content_type(msg, content_type, g.default_encoding)
284 msg["X-MIME-Autoconverted"] = \
285 "from %s to %s by %s id %s" \
286 % (save_charset, g.default_encoding, g.host_name, me)
290 def totext(msg, instring):
291 "Convert instring content to text"
293 # Decode body and recode charset
294 s = decode_body(msg, instring)
296 s = recode_charset(msg, s)
306 def _guess_extension(ctype):
308 if mimetypes is None:
311 user_mime_type = os.path.expanduser('~/.mime.types')
312 if os.path.exists(user_mime_type):
313 mimetypes._db.read(user_mime_type)
314 return mimetypes.guess_extension(ctype)
317 def _save_message(msg, outstring, save_headers=False, save_body=False):
318 for header, param in (
319 ("Content-Disposition", "filename"),
320 ("Content-Type", "name"),
322 fname = msg.get_param(param, header=header)
324 if isinstance(fname, tuple):
325 fname = fname[2] # Do not recode if it isn't recoded yet
327 for forbidden in chr(0), '/', '\\':
328 if forbidden in fname:
337 fname = str(g.save_counter) + fname
339 ext = _guess_extension(msg.get_content_type())
345 outfile = open_output_file(fname)
347 def _output_bytes(s):
348 if not isinstance(s, bytes):
349 s = s.encode(g.default_encoding, "replace")
352 output = _output_bytes
361 def decode_part(msg):
362 "Decode one part of the message"
366 # Test all mask lists and find what to do with this content type
368 ctype = msg.get_content_type()
371 mtype = ctype.split('/')[0]
372 masks.append(mtype + '/*')
376 for content_type in masks:
377 if content_type in g.totext_mask or \
378 content_type in g.decoded_binary_mask:
380 elif content_type in g.binary_mask:
383 elif content_type in g.fully_ignore_mask:
386 encoding = msg["Content-Transfer-Encoding"]
387 if left_binary or encoding in (None, '', '7bit', '8bit', 'binary'):
388 outstring = msg.get_payload()
389 else: # Decode from transfer ecoding to text or binary form
390 outstring = msg.get_payload(decode=1)
391 set_header(msg, "Content-Transfer-Encoding", "8bit")
392 msg["X-MIME-Autoconverted"] = \
393 "from %s to 8bit by %s id %s" % (encoding, g.host_name, me)
395 for content_type in masks:
396 if content_type in g.totext_mask:
397 outstring = totext(msg, outstring)
399 elif content_type in g.binary_mask or \
400 content_type in g.decoded_binary_mask:
404 elif content_type in g.ignore_mask:
406 output("%sMessage body of type %s skipped.%s"
407 % (os.linesep, ctype, os.linesep))
409 elif content_type in g.error_mask:
412 # Neither content type nor masks were listed - decode by default
413 outstring = totext(msg, outstring)
415 for content_type in masks:
416 if content_type in g.save_headers_mask:
417 _save_message(msg, outstring, save_headers=True, save_body=False)
418 if content_type in g.save_body_mask:
419 _save_message(msg, outstring, save_headers=False, save_body=True)
420 if content_type in g.save_message_mask:
421 _save_message(msg, outstring, save_headers=True, save_body=True)
423 for content_type in masks:
424 if content_type in g.error_mask:
425 raise ValueError("content type %s prohibited" % ctype)
428 def decode_multipart(msg):
432 boundary = msg.get_boundary()
435 ctype = msg.get_content_type()
438 mtype = ctype.split('/')[0]
439 masks.append(mtype + '/*')
442 for content_type in masks:
443 if content_type in g.fully_ignore_mask:
445 elif content_type in g.ignore_mask:
447 output("%sMessage body of type %s skipped.%s"
448 % (os.linesep, ctype, os.linesep))
450 output("%s--%s--%s" % (os.linesep, boundary, os.linesep))
453 for content_type in masks:
454 if content_type in g.save_body_mask or \
455 content_type in g.save_message_mask:
458 for subpart in msg.get_payload():
460 first_subpart = False
462 _out_l.append(os.linesep)
463 _out_l.append("--%s%s" % (boundary, os.linesep))
464 _out_l.append(subpart.as_string())
465 _out_l.append("%s--%s--%s" % (os.linesep, boundary, os.linesep))
466 outstring = ''.join(_out_l)
471 for content_type in masks:
472 if content_type in g.save_headers_mask:
473 _save_message(msg, outstring, save_headers=True, save_body=False)
474 if content_type in g.save_body_mask:
475 _save_message(msg, outstring, save_headers=False, save_body=True)
476 if content_type in g.save_message_mask:
477 _save_message(msg, outstring, save_headers=True, save_body=True)
479 for content_type in masks:
480 if content_type in g.error_mask:
481 raise ValueError("content type %s prohibited" % ctype)
485 # Preserve the first part, it is probably not a RFC822-message.
487 # Usually it is just a few lines of text (MIME warning).
489 if msg.preamble is not None:
493 for subpart in msg.get_payload():
496 first_subpart = False
499 output("--%s%s" % (boundary, os.linesep))
501 # Recursively decode all parts of the subpart
502 decode_message(subpart)
505 output("%s--%s--%s" % (os.linesep, boundary, os.linesep))
511 def decode_message(msg):
514 if msg.is_multipart():
515 decode_multipart(msg)
516 elif len(msg): # Simple one-part message (there are headers) - decode it
518 else: # Not a message, just text - copy it literally
519 output(msg.as_string())
522 def open_output_file(filename):
523 fullpath = os.path.abspath(os.path.join(g.destination_dir, filename))
524 full_dir = os.path.dirname(fullpath)
525 create = not os.path.isdir(full_dir)
527 os.makedirs(full_dir)
529 return open(fullpath, 'wb')
532 shutil.rmtree(full_dir)