1 """Decode MIME message"""
7 from .__version__ import __version__, __copyright__
9 if sys.version_info[0] >= 3:
10 # Replace email.message._formatparam with _formatparam from Python 2.7
11 # to avoid re-encoding non-ascii params.
12 from mimedecode import formatparam_27 # noqa: F401: Imported for its side effect
14 me = os.path.basename(sys.argv[0])
17 def output_headers(msg):
18 unix_from = msg.get_unixfrom()
22 for key, value in msg.items():
25 value = value.split(';', 1)
29 output(_decode_header(value[1], strip=False))
31 output(os.linesep) # End of headers
34 def recode_if_needed(s, charset):
35 if bytes is str: # Python2
36 if isinstance(s, bytes) and \
37 charset and charset.lower() != g.default_encoding:
38 s = s.decode(charset, "replace").\
39 encode(g.default_encoding, "replace")
41 if isinstance(s, bytes):
42 s = s.decode(charset, "replace")
46 def _decode_header(s, strip=True):
47 """Return a decoded string according to RFC 2047.
48 NOTE: This is almost the same as email.Utils.decode.
52 L = email.header.decode_header(s)
53 if not isinstance(L, list):
58 for atom, charset in L:
59 atom = recode_if_needed(atom, charset or g.default_encoding)
64 # Now that we've decoded everything, we just need to join all the parts
65 # together into the final string.
69 def decode_header(msg, header):
70 "Decode mail header (if exists) and put it back, if it was encoded"
74 new_value = _decode_header(value)
75 if new_value != value: # do not bother to touch msg if not changed
76 set_header(msg, header, new_value)
79 def decode_header_param(msg, header, param):
80 """Decode mail header's parameter
82 Decode mail header's parameter (if exists)
83 and put it back if it was encoded.
86 value = msg.get_param(param, header=header)
88 if isinstance(value, tuple):
89 new_value = recode_if_needed(value[2], value[0])
91 new_value = _decode_header(value)
92 if new_value != value: # do not bother to touch msg if not changed
93 msg.set_param(param, new_value, header)
96 def _get_exceptions(list):
97 return [x[1:].lower() for x in list[1:] if x[0] == '-']
100 def _decode_headers_params(msg, header, decode_all_params, param_list):
101 if decode_all_params:
102 params = msg.get_params(header=header)
104 for param, value in params:
105 if param not in param_list:
106 decode_header_param(msg, header, param)
108 for param in param_list:
109 decode_header_param(msg, header, param)
112 def _remove_headers_params(msg, header, remove_all_params, param_list):
113 if remove_all_params:
114 params = msg.get_params(header=header)
117 for param, value in params:
118 if param not in param_list:
119 msg.del_param(param, header)
122 if value is None: # No such header
124 if ';' not in value: # There are no parameters
126 del msg[header] # Delete all such headers
127 # Get the value without parameters and set it back
128 msg[header] = value.split(';')[0].strip()
130 for param in param_list:
131 msg.del_param(param, header)
134 def decode_headers(msg):
135 "Decode message headers according to global options"
137 for header_list in g.remove_headers:
138 header_list = header_list.split(',')
139 if header_list[0] == '*': # Remove all headers except listed
140 header_list = _get_exceptions(header_list)
141 for header in msg.keys():
142 if header.lower() not in header_list:
144 else: # Remove listed headers
145 for header in header_list:
148 for header_list, param_list in g.remove_headers_params:
149 header_list = header_list.split(',')
150 param_list = param_list.split(',')
151 # Remove all params except listed.
152 remove_all_params = param_list[0] == '*'
153 if remove_all_params:
154 param_list = _get_exceptions(param_list)
155 if header_list[0] == '*': # Remove for all headers except listed
156 header_list = _get_exceptions(header_list)
157 for header in msg.keys():
158 if header.lower() not in header_list:
159 _remove_headers_params(
160 msg, header, remove_all_params, param_list)
161 else: # Decode for listed headers
162 for header in header_list:
163 _remove_headers_params(
164 msg, header, remove_all_params, param_list)
166 for header_list in g.decode_headers:
167 header_list = header_list.split(',')
168 if header_list[0] == '*': # Decode all headers except listed
169 header_list = _get_exceptions(header_list)
170 for header in msg.keys():
171 if header.lower() not in header_list:
172 decode_header(msg, header)
173 else: # Decode listed headers
174 for header in header_list:
175 decode_header(msg, header)
177 for header_list, param_list in g.decode_header_params:
178 header_list = header_list.split(',')
179 param_list = param_list.split(',')
180 # Decode all params except listed.
181 decode_all_params = param_list[0] == '*'
182 if decode_all_params:
183 param_list = _get_exceptions(param_list)
184 if header_list[0] == '*': # Decode for all headers except listed
185 header_list = _get_exceptions(header_list)
186 for header in msg.keys():
187 if header.lower() not in header_list:
188 _decode_headers_params(
189 msg, header, decode_all_params, param_list)
190 else: # Decode for listed headers
191 for header in header_list:
192 _decode_headers_params(
193 msg, header, decode_all_params, param_list)
196 def set_header(msg, header, value):
200 msg.replace_header(header, value)
205 def set_content_type(msg, newtype, charset=None):
206 msg.set_type(newtype)
209 msg.set_param("charset", charset, "Content-Type")
212 caps = None # Globally stored mailcap database; initialized only if needed
215 def decode_body(msg, s):
216 "Decode body to plain text using first copiousoutput filter from mailcap"
223 caps = mailcap.getcaps()
225 content_type = msg.get_content_type()
226 if content_type.startswith('text/'):
227 charset = msg.get_content_charset()
230 filename = tempfile.mktemp()
233 entries = mailcap.lookup(caps, content_type, "view")
234 for entry in entries:
235 if 'copiousoutput' in entry:
237 test = mailcap.subst(entry['test'], content_type, filename)
238 if test and os.system(test) != 0:
240 command = mailcap.subst(entry["view"], content_type, filename)
246 outfile = open(filename, 'wb')
247 if charset and bytes is not str and isinstance(s, bytes): # Python3
248 s = s.decode(charset, "replace")
249 if not isinstance(s, bytes):
250 s = s.encode(g.default_encoding, "replace")
254 pipe = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
255 new_s = pipe.stdout.read()
257 if pipe.wait() == 0: # result=0, Ok
259 if bytes is not str and isinstance(s, bytes): # Python3
260 s = s.decode(g.default_encoding, "replace")
261 if charset and not isinstance(s, bytes):
262 s = s.encode(charset, "replace")
263 set_content_type(msg, "text/plain")
264 msg["X-MIME-Autoconverted"] = \
265 "from %s to text/plain by %s id %s" \
266 % (content_type, g.host_name, command.split()[0])
268 msg["X-MIME-Autoconverted"] = \
269 "failed conversion from %s to text/plain by %s id %s" \
270 % (content_type, g.host_name, command.split()[0])
276 def recode_charset(msg, s):
277 "Recode charset of the message to the default charset"
279 save_charset = charset = msg.get_content_charset()
280 if charset and charset.lower() != g.default_encoding:
281 s = recode_if_needed(s, charset)
282 content_type = msg.get_content_type()
283 set_content_type(msg, content_type, g.default_encoding)
284 msg["X-MIME-Autoconverted"] = \
285 "from %s to %s by %s id %s" \
286 % (save_charset, g.default_encoding, g.host_name, me)
290 def totext(msg, instring):
291 "Convert instring content to text"
293 # Decode body and recode charset
294 s = decode_body(msg, instring)
296 s = recode_charset(msg, s)
306 def _guess_extension(ctype):
308 if mimetypes is None:
311 user_mime_type = os.path.expanduser('~/.mime.types')
312 if os.path.exists(user_mime_type):
313 mimetypes._db.read(user_mime_type)
314 return mimetypes.guess_extension(ctype)
317 def _save_message(msg, outstring, save_headers=False, save_body=False):
318 for header, param in (
319 ("Content-Disposition", "filename"),
320 ("Content-Type", "name"),
322 fname = msg.get_param(param, header=header)
324 if isinstance(fname, tuple):
325 fname = fname[2] # Do not recode if it isn't recoded yet
327 for forbidden in chr(0), '/', '\\':
328 if forbidden in fname:
337 fname = str(g.save_counter) + fname
339 ext = _guess_extension(msg.get_content_type())
345 outfile = open_output_file(fname)
347 def _output_bytes(s):
348 if not isinstance(s, bytes):
349 s = s.encode(g.default_encoding, "replace")
352 output = _output_bytes
361 def decode_part(msg):
362 "Decode one part of the message"
366 # Test all mask lists and find what to do with this content type
368 ctype = msg.get_content_type()
371 mtype = ctype.split('/')[0]
372 masks.append(mtype + '/*')
376 for content_type in masks:
377 if content_type in g.totext_mask or \
378 content_type in g.decoded_binary_mask:
380 elif content_type in g.binary_mask:
383 elif content_type in g.fully_ignore_mask:
386 encoding = msg["Content-Transfer-Encoding"]
387 if left_binary or encoding in (None, '', '7bit', '8bit', 'binary'):
388 outstring = msg.get_payload()
389 else: # Decode from transfer ecoding to text or binary form
390 outstring = msg.get_payload(decode=1)
391 set_header(msg, "Content-Transfer-Encoding", "8bit")
392 msg["X-MIME-Autoconverted"] = \
393 "from %s to 8bit by %s id %s" % (encoding, g.host_name, me)
395 for content_type in masks:
396 if content_type in g.totext_mask:
397 outstring = totext(msg, outstring)
399 elif content_type in g.binary_mask or \
400 content_type in g.decoded_binary_mask:
404 elif content_type in g.ignore_mask:
406 output("%sMessage body of type %s skipped.%s"
407 % (os.linesep, ctype, os.linesep))
409 elif content_type in g.error_mask:
412 # Neither content type nor masks were listed - decode by default
413 outstring = totext(msg, outstring)
415 for content_type in masks:
416 if content_type in g.save_headers_mask:
417 _save_message(msg, outstring, save_headers=True, save_body=False)
418 if content_type in g.save_body_mask:
419 _save_message(msg, outstring, save_headers=False, save_body=True)
420 if content_type in g.save_message_mask:
421 _save_message(msg, outstring, save_headers=True, save_body=True)
423 for content_type in masks:
424 if content_type in g.error_mask:
425 raise ValueError("content type %s prohibited" % ctype)
428 def decode_multipart(msg):
432 boundary = msg.get_boundary()
435 ctype = msg.get_content_type()
438 mtype = ctype.split('/')[0]
439 masks.append(mtype + '/*')
442 for content_type in masks:
443 if content_type in g.fully_ignore_mask:
445 elif content_type in g.ignore_mask:
447 output("%sMessage body of type %s skipped.%s"
448 % (os.linesep, ctype, os.linesep))
450 output("%s--%s--%s" % (os.linesep, boundary, os.linesep))
453 for content_type in masks:
454 if content_type in g.save_body_mask or \
455 content_type in g.save_message_mask:
458 for subpart in msg.get_payload():
460 first_subpart = False
462 _out_l.append(os.linesep)
463 _out_l.append("--%s%s" % (boundary, os.linesep))
464 _out_l.append(subpart.as_string())
465 _out_l.append("%s--%s--%s" % (os.linesep, boundary, os.linesep))
466 outstring = ''.join(_out_l)
471 for content_type in masks:
472 if content_type in g.save_headers_mask:
473 _save_message(msg, outstring, save_headers=True, save_body=False)
474 if content_type in g.save_body_mask:
475 _save_message(msg, outstring, save_headers=False, save_body=True)
476 if content_type in g.save_message_mask:
477 _save_message(msg, outstring, save_headers=True, save_body=True)
479 for content_type in masks:
480 if content_type in g.error_mask:
481 raise ValueError("content type %s prohibited" % ctype)
485 # Preserve the first part, it is probably not a RFC822-message.
487 # Usually it is just a few lines of text (MIME warning).
489 if msg.preamble is not None:
493 for subpart in msg.get_payload():
496 first_subpart = False
499 output("--%s%s" % (boundary, os.linesep))
501 # Recursively decode all parts of the subpart
502 decode_message(subpart)
505 output("%s--%s--%s" % (os.linesep, boundary, os.linesep))
511 def decode_message(msg):
514 if msg.is_multipart():
515 decode_multipart(msg)
516 elif len(msg): # Simple one-part message (there are headers) - decode it
518 else: # Not a message, just text - copy it literally
519 output(msg.as_string())
522 def open_output_file(filename):
523 fullpath = os.path.abspath(os.path.join(g.destination_dir, filename))
524 full_dir = os.path.dirname(fullpath)
525 create = not os.path.isdir(full_dir)
527 os.makedirs(full_dir)
529 return open(fullpath, 'wb')
532 os.removedirs(full_dir)