#! /usr/bin/env python
"""Decode MIME message"""
-
from mimedecode_version import __version__, __author__, __copyright__, __license__
-
import sys, os
import email
-try:
- from cStringIO import StringIO
-except ImportError:
- from StringIO import StringIO
-
-
-import socket
-host_name = socket.gethostname()
-
me = os.path.basename(sys.argv[0])
""" % (__version__, __copyright__))
if exit: sys.exit(0)
-
-def usage(code=0):
+def usage(code=0, errormsg=''):
version(0)
sys.stdout.write("""\
-Usage: %s [-h|--help] [-V|--version] [-cCDP] [-f charset] [-d header] [-p header:param] [-beit mask] [filename]
+Usage: %s [-h|--help] [-V|--version] [-cCDP] [-H|--host=hostname] [-f charset] [-d header] [-p header:param] [-beit mask] [-o output_file] [input_file [output_file]]
""" % me)
+ if errormsg:
+ sys.stderr.write(errormsg + '\n')
sys.exit(code)
-def output(s, outfile=sys.stdout):
- outfile.write(s)
-
-def output_headers(msg, outfile=sys.stdout):
+def output_headers(msg):
unix_from = msg.get_unixfrom()
if unix_from:
- output(unix_from + os.linesep)
+ output(unix_from + '\n')
for key, value in msg.items():
- output("%s: %s\n" % (key, value), outfile)
- output("\n", outfile) # End of headers
+ output("%s: %s\n" % (key, value))
+ output("\n") # End of headers
def recode(s, charset):
- return unicode(s, charset, "replace").encode(GlobalOptions.default_encoding, "replace")
-
+ return unicode(s, charset, "replace").encode(gopts.default_encoding, "replace")
-def recode2(s, charset):
- if charset and charset.lower() <> GlobalOptions.default_encoding:
+def recode_if_needed(s, charset):
+ if charset and charset.lower() <> gopts.default_encoding:
s = recode(s, charset)
return s
"""Return a decoded string according to RFC 2047.
NOTE: This is almost the same as email.Utils.decode.
"""
- from types import ListType
import email.Header
L = email.Header.decode_header(s)
- if not isinstance(L, ListType):
+ if not isinstance(L, list):
# s wasn't decoded
return s
if charset is None:
rtn.append(atom)
else:
- rtn.append(recode2(atom, charset))
+ rtn.append(recode_if_needed(atom, charset))
rtn.append(' ')
del rtn[-1] # remove the last space
# together into the final string.
return ''.join(rtn)
-
def decode_header(msg, header):
"Decode mail header (if exists) and put it back, if it was encoded"
def _decode_header_param(s):
- return recode2(s[2], s[0])
-
+ return recode_if_needed(s[2], s[0])
def decode_header_param(msg, header, param):
"Decode mail header's parameter (if exists) and put it back, if it was encoded"
if msg.has_key(header):
value = msg.get_param(param, header=header)
if value:
- from types import TupleType
- if isinstance(value, TupleType):
+ if isinstance(value, tuple):
new_value = _decode_header_param(value)
else:
new_value = _decode_header(value)
def decode_headers(msg):
"Decode message headers according to global options"
- for header in GlobalOptions.decode_headers:
+ for header in gopts.decode_headers:
decode_header(msg, header)
- for header, param in GlobalOptions.decode_header_params:
+ for header, param in gopts.decode_header_params:
decode_header_param(msg, header, param)
+ for header in gopts.remove_headers:
+ del msg[header]
+
def set_header(msg, header, value):
"Replace header"
msg.set_param("charset", charset, "Content-Type")
-
caps = None # Globally stored mailcap database; initialized only if needed
def decode_body(msg, s):
os.remove(filename)
set_content_type(msg, "text/plain")
- msg["X-MIME-Autoconverted"] = "from %s to text/plain by %s id %s" % (content_type, host_name, command.split()[0])
+ msg["X-MIME-Autoconverted"] = "from %s to text/plain by %s id %s" % (content_type, gopts.host_name, command.split()[0])
return s
"Recode charset of the message to the default charset"
save_charset = charset = msg.get_content_charset()
- if charset and charset.lower() <> GlobalOptions.default_encoding:
- s = recode2(s, charset)
+ if charset and charset.lower() <> gopts.default_encoding:
+ s = recode_if_needed(s, charset)
content_type = msg.get_content_type()
- set_content_type(msg, content_type, GlobalOptions.default_encoding)
- msg["X-MIME-Autoconverted"] = "from %s to %s by %s id %s" % (save_charset, GlobalOptions.default_encoding, host_name, me)
+ set_content_type(msg, content_type, gopts.default_encoding)
+ msg["X-MIME-Autoconverted"] = "from %s to %s by %s id %s" % (save_charset, gopts.default_encoding, gopts.host_name, me)
return s
def totext(msg, instring):
"Convert instring content to text"
- if msg.is_multipart(): # Recursively decode all parts of the multipart message
- newfile = StringIO(str(msg))
- newfile.seek(0)
- decode_file(newfile)
- return
-
# Decode body and recode charset
s = decode_body(msg, instring)
- if GlobalOptions.recode_charset:
+ if gopts.recode_charset:
s = recode_charset(msg, s)
output_headers(msg)
else: # Decode from transfer ecoding to text or binary form
outstring = str(msg.get_payload(decode=1))
set_header(msg, "Content-Transfer-Encoding", "8bit")
- msg["X-MIME-Autoconverted"] = "from %s to 8bit by %s id %s" % (encoding, host_name, me)
+ msg["X-MIME-Autoconverted"] = "from %s to 8bit by %s id %s" % (encoding, gopts.host_name, me)
# Test all mask lists and find what to do with this content type
masks = []
masks.append('*/*')
for content_type in masks:
- if content_type in GlobalOptions.totext_mask:
+ if content_type in gopts.totext_mask:
totext(msg, outstring)
return
- elif content_type in GlobalOptions.binary_mask:
+ elif content_type in gopts.binary_mask:
output_headers(msg)
output(outstring)
return
- elif content_type in GlobalOptions.ignore_mask:
+ elif content_type in gopts.ignore_mask:
output_headers(msg)
output("\nMessage body of type `%s' skipped.\n" % content_type)
return
- elif content_type in GlobalOptions.error_mask:
+ elif content_type in gopts.error_mask:
raise ValueError, "content type `%s' prohibited" % content_type
# Neither content type nor masks were listed - decode by default
totext(msg, outstring)
-def decode_file(infile):
- "Decode the entire message"
+def decode_multipart(msg):
+ "Decode multipart"
- msg = email.message_from_file(infile)
- boundary = msg.get_boundary()
+ decode_headers(msg)
+ output_headers(msg)
- if msg.is_multipart():
- decode_headers(msg)
- output_headers(msg)
+ if msg.preamble: # Preserve the first part, it is probably not a RFC822-message
+ output(msg.preamble) # Usually it is just a few lines of text (MIME warning)
- if msg.preamble: # Preserve the first part, it is probably not a RFC822-message
- output(msg.preamble) # Usually it is just a few lines of text (MIME warning)
+ boundary = msg.get_boundary()
- for subpart in msg.get_payload():
+ for subpart in msg.get_payload():
+ if boundary:
output("\n--%s\n" % boundary)
- decode_part(subpart)
+ # Recursively decode all parts of the subpart
+ decode_message(subpart)
+
+ if boundary:
output("\n--%s--\n" % boundary)
- if msg.epilogue:
- output(msg.epilogue)
+ if msg.epilogue:
+ output(msg.epilogue)
- else:
- if msg.has_key("Content-Type"): # Simple one-part message - decode it
- decode_part(msg)
- else: # Not a message, just text - copy it literally
- output(str(msg))
+def decode_message(msg):
+ "Decode message"
+
+ if msg.is_multipart():
+ decode_multipart(msg)
+ elif len(msg): # Simple one-part message (there are headers) - decode it
+ decode_part(msg)
+ else: # Not a message, just text - copy it literally
+ output(msg.as_string())
class GlobalOptions:
from m_lib.defenc import default_encoding
recode_charset = 1 # recode charset of message body
- decode_headers = ["From", "Subject"] # A list of headers to decode
+ host_name = None
+
+ # A list of headers to decode
+ decode_headers = ["From", "To", "Cc", "Reply-To", "Mail-Followup-To",
+ "Subject"]
+
+ # A list of headers' parameters to decode
decode_header_params = [
("Content-Type", "name"),
("Content-Disposition", "filename"),
- ] # A list of headers' parameters to decode
+ ]
+
+ # A list of headers to remove
+ remove_headers = []
totext_mask = [] # A list of content-types to decode
binary_mask = [] # A list to pass through
ignore_mask = [] # Ignore (skip, do not decode and do not include into output)
error_mask = [] # Raise error if encounter one of these
+ input_filename = None
+ output_filename = None
-def init():
+gopts = GlobalOptions
+
+
+def get_opt():
from getopt import getopt, GetoptError
try:
- options, arguments = getopt(sys.argv[1:], 'hVcCDPf:d:p:b:e:i:t:',
- ['help', 'version'])
+ options, arguments = getopt(sys.argv[1:], 'hVcCDPH:f:d:p:r:b:e:i:t:o:',
+ ['help', 'version', 'host'])
except GetoptError:
usage(1)
for option, value in options:
- if option == '-h':
- usage()
- elif option == '--help':
+ if option in ('-h', '--help'):
usage()
- elif option == '-V':
- version()
- elif option == '--version':
+ elif option in ('-V', '--version'):
version()
elif option == '-c':
- GlobalOptions.recode_charset = 1
+ gopts.recode_charset = 1
elif option == '-C':
- GlobalOptions.recode_charset = 0
+ gopts.recode_charset = 0
+ elif option in ('-H', '--host'):
+ gopts.host_name = value
elif option == '-f':
- GlobalOptions.default_encoding = value
+ gopts.default_encoding = value
elif option == '-d':
- GlobalOptions.decode_headers.append(value)
+ gopts.decode_headers.append(value)
elif option == '-D':
- GlobalOptions.decode_headers = []
+ gopts.decode_headers = []
elif option == '-p':
- GlobalOptions.decode_header_params.append(value.split(':', 1))
+ gopts.decode_header_params.append(value.split(':', 1))
elif option == '-P':
- GlobalOptions.decode_header_params = []
+ gopts.decode_header_params = []
+ elif option == '-r':
+ gopts.remove_headers.append(value)
elif option == '-t':
- GlobalOptions.totext_mask.append(value)
+ gopts.totext_mask.append(value)
elif option == '-b':
- GlobalOptions.binary_mask.append(value)
+ gopts.binary_mask.append(value)
elif option == '-i':
- GlobalOptions.ignore_mask.append(value)
+ gopts.ignore_mask.append(value)
elif option == '-e':
- GlobalOptions.error_mask.append(value)
+ gopts.error_mask.append(value)
+ elif option == '-o':
+ gopts.output_filename = value
else:
usage(1)
if __name__ == "__main__":
- arguments = init()
+ arguments = get_opt()
la = len(arguments)
- if la >= 2:
- usage(1)
- if (la == 0) or (arguments[0] == '-'):
+ if la == 0:
+ gopts.input_filename = '-'
infile = sys.stdin
+ if gopts.output_filename:
+ outfile = open(gopts.output_filename, 'w')
+ else:
+ gopts.output_filename = '-'
+ outfile = sys.stdout
+ elif la in (1, 2):
+ if (arguments[0] == '-'):
+ gopts.input_filename = '-'
+ infile = sys.stdin
+ else:
+ gopts.input_filename = arguments[0]
+ infile = open(arguments[0], 'r')
+ if la == 1:
+ if gopts.output_filename:
+ outfile = open(gopts.output_filename, 'w')
+ else:
+ gopts.output_filename = '-'
+ outfile = sys.stdout
+ elif la == 2:
+ if gopts.output_filename:
+ usage(1, 'Too many output filenames')
+ if (arguments[1] == '-'):
+ gopts.output_filename = '-'
+ outfile = sys.stdout
+ else:
+ gopts.output_filename = arguments[1]
+ outfile = open(arguments[1], 'w')
else:
- infile = open(arguments[0], 'r')
+ usage(1, 'Too many arguments')
- decode_file(infile)
+ if (infile is sys.stdin) and sys.stdin.isatty():
+ if (outfile is sys.stdout) and sys.stdout.isatty():
+ usage()
+ usage(1, 'Filtering from console is forbidden')
+
+ if not gopts.host_name:
+ import socket
+ gopts.host_name = socket.gethostname()
+
+ gopts.outfile = outfile
+ output = outfile.write
+
+ try:
+ decode_message(email.message_from_file(infile))
+ finally:
+ infile.close()
+ outfile.close()