mimedecode.py version 2.0.0d.

author Oleg Broytman <phd@phdru.name>

Mon, 24 Apr 2006 16:09:52 +0000 (16:09 +0000)

committer Oleg Broytman <phd@phdru.name>

Mon, 24 Apr 2006 16:09:52 +0000 (16:09 +0000)
author Oleg Broytman <phd@phdru.name>
Mon, 24 Apr 2006 16:09:52 +0000 (16:09 +0000)
committer Oleg Broytman <phd@phdru.name>
Mon, 24 Apr 2006 16:09:52 +0000 (16:09 +0000)
diff --git a/ANNOUNCE b/ANNOUNCE

new file mode 100644 (file)

index 0000000..731ee2c
--- /dev/null
+++ b/ANNOUNCE
@@ -0,0 +1,59 @@
+
+                                mimedecode.py
+
+WHAT IS IT
+
+   Mail users, especially in non-English countries, often find that mail
+messages arrived in different formats, with different content types, in
+different encodings and charsets. Usually this is good because it allows us to
+use apropriate format/encoding/whatever. Sometimes, though, some unification is
+desireable. For example, one may want to put mail messages into an archive,
+make HTML indicies, run search indexer, etc. In such situations converting
+messages to text in one character set and skipping some binary atachmetnts is
+much desireable.
+
+   Here is the solution - mimedecode.py.
+
+   This is a program to decode MIME messages. The program expects one input
+file (either on command line or on stdin) which is treated as an RFC822 mesage,
+and decoded to stdout. If the file is not an RFC822 message it is just piped to
+stdout one-to-one. If the file is a simple RFC822 message it is just decoded as
+one part. If it is a MIME message with multiple parts ("attachments") all parts
+are decoded. Decoding can be controlled by command-line options.
+
+
+WHAT'S NEW in version 2.0.0d (2004-08-26)
+   Documentation update. Changed and corrected documentation, especially
+docbook source file. Changed Makefiles to use sgml-tools or 4xslt (from
+4Suite.org).
+
+
+WHAT'S NEW in version 2.0.0
+   Major rewrite to use python email package. Old version is still inluded in
+the archive; the latest bug-fix version is 1.1.7.
+
+
+WHERE TO GET
+   Master site:    http://phd.pp.ru/Software/Python/#mimedecode
+
+   Faster mirrors: http://phd.by.ru/Software/Python/#mimedecode
+                http://phd2.chat.ru/Software/Python/#mimedecode
+
+   Requires: Python 2.2.2+
+
+   Recommends: configured mailcap database.
+
+   Documentation (also included in the package):
+         http://phd.pp.ru/Software/Python/mimedecode.txt
+         http://phd.by.ru/Software/Python/mimedecode.txt
+      http://phd2.chat.ru/Software/Python/mimedecode.txt
+
+
+AUTHOR
+   Oleg Broytmann <phd@phd.pp.ru>
+
+COPYRIGHT
+   Copyright (C) 2001-2004 PhiloSoft Design
+
+LICENSE
+   GPL
diff --git a/MANIFEST.in b/MANIFEST.in

new file mode 100644 (file)

index 0000000..946c283
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include mimedecode.py mimedecode1.py
+include Makefile* MANIFEST.in ANNOUNCE
+include mimedecode.docbook mimedecode.html mimedecode.txt mimedecode.man
diff --git a/Makefile b/Makefile

new file mode 100644 (file)

index 0000000..a2c2b78
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,34 @@
+
+all: docs sdist
+
+DISTFILES = mimedecode.py \
+mimedecode1.py \
+setup.py \
+Makefile \
+Makefile.sgmlt \
+Makefile.4xslt \
+MANIFEST.in \
+ANNOUNCE \
+mimedecode.docbook \
+mimedecode.html \
+mimedecode.man \
+mimedecode.txt
+
+sdist: dist/mimedecode-2.0.0d.tar.gz
+       touch sdist
+
+dist/mimedecode-2.0.0d.tar.gz: $(DISTFILES)
+       umask 022 && python setup.py sdist
+
+docs: mimedecode.man mimedecode.txt mimedecode.html
+
+include Makefile.4xslt
+
+
+CLEANFILES = mimedecode.pyc MANIFEST sdist
+
+clean:
+       rm -f $(CLEANFILES)
+
+distclean: clean
+       rm -rf dist sdist
diff --git a/Makefile.4xslt b/Makefile.4xslt

new file mode 100644 (file)

index 0000000..6029e4a
--- /dev/null
+++ b/Makefile.4xslt
@@ -0,0 +1,11 @@
+DOCBOOK_XSL=/usr/local/src/DocBook/docbook-xsl
+
+mimedecode.html: mimedecode.docbook Makefile.4xslt
+       4xslt $< $(DOCBOOK_XSL)/html/docbook.xsl >$@
+
+mimedecode.txt: mimedecode.html Makefile.4xslt
+       elinks -dump $< >$@
+
+mimedecode.man: mimedecode.docbook Makefile.4xslt
+       4xslt $< $(DOCBOOK_XSL)/manpages/docbook.xsl
+       mv mimedecode.py.1 $@
diff --git a/Makefile.sgmlt b/Makefile.sgmlt

new file mode 100644 (file)

index 0000000..eedda06
--- /dev/null
+++ b/Makefile.sgmlt
@@ -0,0 +1,10 @@
+mimedecode.html: mimedecode.docbook Makefile.sgmlt
+       sgmltools -b html $<
+       mv mimedecode/mimedecode.py.html mimedecode.html
+       rmdir mimedecode
+
+mimedecode.txt: mimedecode.docbook Makefile.sgmlt
+       sgmltools -b txt $<
+
+mimedecode.man: mimedecode.docbook Makefile.sgmlt
+       docbook-to-man $< >$@
diff --git a/mimedecode.docbook b/mimedecode.docbook

new file mode 100644 (file)

index 0000000..1e6567a
--- /dev/null
+++ b/mimedecode.docbook
@@ -0,0 +1,342 @@
+<?xml version="1.0" standalone="no"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.1//EN"
+   "http://www.oasis-open.org/docbook/xml/4.1/docbook.dtd">
+
+<refentry id="mimedecode.py">
+
+<refmeta>
+   <refentrytitle>mimedecode.py</refentrytitle>
+   <manvolnum>1</manvolnum>
+</refmeta>
+
+<refnamediv>
+   <refname>mimedecode.py</refname>
+   <refpurpose>decode MIME message</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+   <cmdsynopsis>
+      <command>mimedecode.py</command>
+      <arg choice="opt">
+         <option>-h|--help</option>
+      </arg>
+      <arg choice="opt">
+         <option>-V|--version</option>
+      </arg>
+      <arg choice="opt">
+         <option>-cCDP</option>
+      </arg>
+      <arg choice="opt">
+         <option>-f charset</option>
+      </arg>
+      <arg choice="opt">
+         <option>-d header</option>
+      </arg>
+      <arg choice="opt">
+         <option>-p header:param</option>
+      </arg>
+      <arg choice="opt">
+         <option>-beit mask</option>
+      </arg>
+      <arg choice="opt">filename</arg>
+   </cmdsynopsis>
+</refsynopsisdiv>
+
+
+<refsect1>
+<title>DESCRIPTION</title>
+<para>
+   Mail users, especially in non-English countries, often find that mail
+messages arrived in different formats, with different content types, in
+different encodings and charsets. Usually it is good because it allows to use
+an appropriate format/encoding/whatever. Sometimes, though, some unification is
+desirable. For example, one may want to put mail messages into an archive,
+make HTML indices, run search indexer, etc. In such situations converting
+messages to text in one character set and skipping some binary attachments is
+much desirable.
+</para>
+
+<para>
+   Here is the solution - mimedecode.py!
+</para>
+
+<para>
+   It is a program to decode MIME messages. The program expects one input file
+(either on the command line or on stdin) which is treated as an RFC822 message,
+and decoded to stdout. If the file is not an RFC822 message it is just piped to
+stdout as is. If the file is a simple RFC822 message it is just decoded as one
+part. If it is a MIME message with multiple parts ("attachments") all parts are
+decoded recursively. Decoding can be controlled by the command-line options.
+</para>
+
+<para>
+   First, Subject and Content-Disposition headers are examined. If any of those
+exists, it is decoded according to RFC2047. Content-Disposition header is not
+decoded - only its "filename" parameter. Encoded header parameters violate
+the RFC, but widely deployed anyway, especially in the M$ Ophice GUI (often
+referred as "Windoze") world, where programmers are often ignorant lamers who
+never even heard about RFCs. Correct parameter encoding specified by RFC2231.
+This program decodes RFC2231-encoded parameters, too.
+</para>
+
+<para>
+   Then the body of the message (or the current part) is decoded. Decoding
+starts with looking at header Content-Transfer-Encoding. If the header
+specifies non-8bit encoding (usually base64 or quoted-printable), the body
+converted to 8bit. Then, if its content type is multipart (multipart/related or
+multipart/mixed, e.g) every part is recursively decoded. If it is not
+multipart, mailcap database is consulted to find a way to convert the body to
+plain text. (I have no idea how mailcap could be configured on said M$ Ophice
+GUI, please don't ask me; real OS users can consult my example at
+http://phd.pp.ru/Software/dotfiles/mailcap.html). The decoding process uses
+first copiousoutput filter it can find. If there is no any filter the body just
+passed unconverted.
+</para>
+
+<para>
+   Then Content-Type header consulted for charset. If it is not equal to
+current default charset the body text recoded. Finally message headers and body
+flushed to stdout.
+</para>
+</refsect1>
+
+
+<refsect1>
+<title>OPTIONS</title>
+<variablelist>
+   <varlistentry>
+      <term>-h</term>
+      <term>-help</term>
+      <listitem>
+         <para>
+            Print brief usage help and exit.
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-V</term>
+      <term>--version</term>
+      <listitem>
+         <para>
+            Print version and exit.
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-c</term>
+      <listitem>
+         <para>
+            Recode different character sets in message body to current default
+            charset; this is the default.
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-C</term>
+      <listitem>
+         <para>
+            Do not recode character sets in message body.
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-f charset</term>
+      <listitem>
+         <para>
+            Force this charset to be the current default charset instead of
+            sys.getdefaultencoding().
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-d header</term>
+      <listitem>
+         <para>
+            Add the header to a list of headers to decode; initially the list
+            contains headers "From" and "Subject".
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-D</term>
+      <listitem>
+         <para>
+            Clear the list of headers to decode (make it empty).
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-p header:param</term>
+      <listitem>
+         <para>
+            Add the (header, param) pair to a list of headers' parameters to
+            decode; initially the list contains header "Content-Disposition",
+            parameter "filename".
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-P</term>
+      <listitem>
+         <para>
+            Clear the list of headers' parameters to decode (make it empty).
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-b mask</term>
+      <listitem>
+         <para>
+            Append mask to the list of binary content types; if the message to
+            decode has a part of this type the program will pass the part as is,
+            without any additional processing.
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-e mask</term>
+      <listitem>
+         <para>
+            Append mask to the list of error content types; if the message to
+            decode has a part of this type the program will raise ValueError.
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-i mask</term>
+      <listitem>
+         <para>
+            Append mask to the list of content types to ignore; if the message to
+            decode has a part of this type the program will not pass it, instead
+            a line "Message body of type `%s' skipped." will be issued.
+         </para>
+      </listitem>
+   </varlistentry>
+
+   <varlistentry>
+      <term>-t mask</term>
+      <listitem>
+         <para>
+            Append mask to the list of content types to convert to text; if the
+            message to decode has a part of this type the program will consult
+            mailcap database, find first copiousoutput filter and convert the
+            part.
+         </para>
+      </listitem>
+   </varlistentry>
+</variablelist>
+
+<para>
+   The last 4 options (-beit) require more explanation. They allow a user
+to control body decoding with great flexibility. Think about said mail
+archive; for example, its maintainer wants to put there only texts, convert
+Postscript/PDF to text, pass HTML and images as is, and ignore everything
+else. Easy:
+</para>
+
+<para>
+<code language="shell">
+   mimedecode.py -t application/postscript -t application/pdf -b text/html
+         -b 'image/*' -i '*/*'
+</code>
+</para>
+
+<para>
+   When the program decodes a message (or its part), it consults
+Content-Type header. The content type is searched in all 4 lists, in order
+"text-binary-ignore-error". If found, appropriate action performed. If not
+found, the program search the same lists for "type/*" mask (the type of
+"text/html" is just "text"). If found, appropriate action performed. If not
+found, the program search the same lists for "*/*" mask. If found,
+appropriate action performed. If not found, the program uses default
+action, which is to decode everything to text (if mailcap specifies
+a filter).
+</para>
+
+<para>
+   Initially all 4 lists are empty, so without any additional parameters
+the program always uses the default decoding.
+</para>
+</refsect1>
+
+
+<refsect1>
+<title>ENVIRONMENT</title>
+<para>
+   LANG
+   LC_ALL
+   LC_CTYPE
+      Define current locale settings. Used to determine current default
+      charset (if your Python is properly installed and configured).
+</para>
+</refsect1>
+
+
+<refsect1>
+<title>BUGS</title>
+<para>
+   The program may produce incorrect MIME message. The purpose of the program
+is to decode whatever it is possible to decode, not to produce absolutely
+correct MIME output. The incorrect parts are obvious - decoded Subject headers
+and filenames. Other than that output is correct MIME message. The program does
+not try to guess whether the headers are correct. For example, if a message
+header states that charset is iso8859-5, but the body is actually in koi8-r -
+the program will recode the message to the wrong charset.
+</para>
+</refsect1>
+
+
+<refsect1>
+<title>AUTHOR</title>
+<para>
+   Oleg Broytmann &lt;phd@phd.pp.ru&gt;
+</para>
+</refsect1>
+
+
+<refsect1>
+<title>COPYRIGHT</title>
+<para>
+   Copyright (C) 2001-2004 PhiloSoft Design
+</para>
+</refsect1>
+
+
+<refsect1>
+<title>LICENSE</title>
+<para>
+   GNU GPL
+</para>
+</refsect1>
+
+
+<refsect1>
+<title>NO WARRANTIES</title>
+<para>
+   This program is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+   more details.
+</para>
+</refsect1>
+
+
+<refsect1>
+<title>SEE ALSO</title>
+<para>
+   mimedecode.py home page: http://phd.pp.ru/Software/Python/#mimedecode
+</para>
+</refsect1>
+
+</refentry>
diff --git a/mimedecode.py b/mimedecode.py

index 5da188ae6081e5396c7bbccad89d92a911a18405..c0a9ef3ac76371defeff55e2f53d4c3ab047c3c7 100755 (executable)
--- a/mimedecode.py
+++ b/mimedecode.py
@@ -6,11 +6,12 @@ Copyright: (C) 2001-2002 PhiloSoft Design
  License: GPL
  """
  
  License: GPL
  """
  
-__version__ = "1.1.7"
+__version__ = "2.0.0"
+__copyright__ = "Copyright (C) 2001-2002 PhiloSoft Design"
  
  
  import sys, os
  
  
  import sys, os
-import mimetools
+import email
  
  try:
     from cStringIO import StringIO
  
  try:
     from cStringIO import StringIO
@@ -24,27 +25,31 @@ host_name = socket.gethostname()
  me = os.path.basename(sys.argv[0])
  
  
  me = os.path.basename(sys.argv[0])
  
  
+def version(exit=1):
+   sys.stdout.write("""\
+BroytMann mimedecode.py version %s, %s
+""" % (__version__, __copyright__))
+   if exit: sys.exit(0)
+
+
  def usage(code=0):
  def usage(code=0):
+   version(0)
     sys.stdout.write("""\
  Usage: %s [-h|--help] [-V|--version] [-cCDP] [-f charset] [-d header] [-p header:param] [-beit mask] [filename]
  """ % me)
     sys.exit(code)
  
  
     sys.stdout.write("""\
  Usage: %s [-h|--help] [-V|--version] [-cCDP] [-f charset] [-d header] [-p header:param] [-beit mask] [filename]
  """ % me)
     sys.exit(code)
  
  
-def version():
-   sys.stdout.write("""\
-BroytMann mimedecode.py version %s
-""" % __version__)
-   sys.exit(0)
+def output(s, outfile = sys.stdout):
+   outfile.write(s)
  
  
-
-def output(s):
-   sys.stdout.write(s)
-
-def output_headers(msg):
-   if msg.unixfrom:
-      output(msg.unixfrom)
-   output("%s\n" % msg)
+def output_headers(msg, outfile = sys.stdout):
+   unix_from = msg.get_unixfrom()
+   if unix_from:
+      output(unix_from + os.linesep)
+   for key, value in msg.items():
+      output("%s: %s\n" % (key, value), outfile)
+   output("\n", outfile) # End of headers
  
  
  def recode(s, charset):
  
  
  def recode(s, charset):
@@ -53,50 +58,34 @@ def recode(s, charset):
  
  def recode2(s, charset):
     if charset and charset <> GlobalOptions.default_charset:
  
  def recode2(s, charset):
     if charset and charset <> GlobalOptions.default_charset:
-      charset = charset.lower()
        s = recode(s, charset)
     return s
  
  
        s = recode(s, charset)
     return s
  
  
-def getparam(msg, header, param):
-   "Get parameter from the header; return the header without the parameter, parameter itself and rfc2231 flag"
-
-   if not msg.has_key(header):
-      return None, None, 0
+def _decode_header(s):
+   """Return a decoded string according to RFC 2047.
+   NOTE: This is almost the same as email.Utils.decode.
+   """
+   from types import ListType
+   import email.Header
  
  
-   header = msg[header]
-   parts = [part.strip() for part in header.split(';')]
-
-   new_parts = [parts[0]] # The header itself
-   del parts[0]
-
-   new_value = None
-   rfc2231_encoded = 0
-
-   import re, rfc822
-   rfc2231_continuation = re.compile("^%s\\*[0-9]+\\*?$" % param)
-   rfc2231_header = []
+   L = email.Header.decode_header(s)
+   if not isinstance(L, ListType):
+      # s wasn't decoded
+      return s
  
  
-   for part in parts:
-      name, value = part.split('=', 1)
-      # The code is incomplete. Continuations in rfc2231-encoded paramters
-      # (header*1, header*2, etc) are not yet supported
-      if (name == param) or (name == param + '*'):
-         new_value = rfc822.unquote(value)
-         rfc2231_encoded += (name <> param)
-      elif rfc2231_continuation.match(name):
-         rfc2231_header.append(rfc822.unquote(value))
-         rfc2231_encoded = 1
+   rtn = []
+   for atom, charset in L:
+      if charset is None:
+         rtn.append(atom)
        else:
        else:
-         new_parts.append(part)
+         rtn.append(recode2(atom, charset))
+      rtn.append(' ')
+   del rtn[-1] # remove the last space
  
  
-   if rfc2231_header:
-      new_value = ''.join(rfc2231_header)
-
-   if new_value is not None:
-      return "; ".join(new_parts), new_value, rfc2231_encoded
-
-   return None, None, 0
+   # Now that we've decoded everything, we just need to join all the parts
+   # together into the final string.
+   return ''.join(rtn)
  
  
  def decode_header(msg, header):
  
  
  def decode_header(msg, header):
@@ -104,97 +93,28 @@ def decode_header(msg, header):
  
     if msg.has_key(header):
        value = msg[header]
  
     if msg.has_key(header):
        value = msg[header]
-      new_value = decode_rfc2047(value)
-      if value <> new_value: # do not bother to touch msg if not changed
-         msg[header] = new_value
+      new_value = _decode_header(value)
+      if new_value <> value: # do not bother to touch msg if not changed
+         set_header(msg, header, new_value)
+
+
+def _decode_header_param(s):
+   return recode2(s[2], s[0])
  
  
  def decode_header_param(msg, header, param):
     "Decode mail header's parameter (if exists) and put it back, if it was encoded"
  
     if msg.has_key(header):
  
  
  def decode_header_param(msg, header, param):
     "Decode mail header's parameter (if exists) and put it back, if it was encoded"
  
     if msg.has_key(header):
-      new_value, pstr, rfc2231_encoded = getparam(msg, header, param)
-      if pstr is not None:
-         if rfc2231_encoded:
-            new_str = decode_rfc2231(pstr)
+      value = msg.get_param(param, header=header)
+      if value:
+         from types import TupleType
+         if isinstance(value, TupleType):
+            new_value = _decode_header_param(value)
           else:
           else:
-            new_str = decode_rfc2047(pstr)
-         if pstr <> new_str: # do not bother to touch msg if not changed
-            msg[header] = "%s; %s=\"%s\"" % (new_value, param, new_str)
-
-
-def decode_rfc2047(s):
-   "Decode string according to rfc2047"
-
-   parts = s.split() # by whitespaces
-   new_parts = []
-   got_encoded = 0
-
-   for s in parts:
-      l = s.split('?')
-
-      if l[0] <> '=' or l[4] <> '=': # assert correct format
-         new_parts.append(' ')
-         new_parts.append(s) # if not encoded - just put it into output
-         got_encoded = 0
-         continue
-
-      if not got_encoded:
-         new_parts.append(' ') # no space between encoded parts, one space otherwise
-         got_encoded = 1
-
-      charset = l[1].lower()
-      encoding = l[2].lower()
-      s = l[3]
-
-      if '*' in charset:
-         charset, language = charset.split('*', 1) # language ignored
-
-      infile = StringIO(s)
-      outfile = StringIO()
-
-      if encoding == "b":
-         from base64 import decode
-      elif encoding == "q":
-         from quopri import decode
-      else:
-         raise ValueError, "wrong encoding `%s' (expected 'b' or 'q')" % encoding
-
-      decode(infile, outfile)
-      s = outfile.getvalue()
-
-      if charset == GlobalOptions.default_charset:
-         new_parts.append(s) # do not recode
-         continue
-
-      s = recode(s, charset)
-      new_parts.append(s)
-
-   if new_parts and new_parts[0] == ' ':
-      del new_parts[0]
-   return ''.join(new_parts)
-
-
-def decode_rfc2231(s):
-   "Decode string according to rfc2231"
-
-   charset, language, s = s.split("'", 2) # language ignored
-
-   i = 0
-   result = []
-
-   while i < len(s):
-      c = s[i]
-      if c == '%': # hex
-         i += 1
-         c = chr(int(s[i:i+2], 16))
-         i += 1
-      result.append(c)
-      i += 1
-
-   s = ''.join(result)
-   s = recode2(s, charset)
-   return s
+            new_value = _decode_header(value)
+         if new_value <> value: # do not bother to touch msg if not changed
+            msg.set_param(param, new_value, header)
  
  
  def decode_headers(msg):
  
  
  def decode_headers(msg):
@@ -205,32 +125,23 @@ def decode_headers(msg):
  
     for header, param in GlobalOptions.decode_header_params:
        decode_header_param(msg, header, param)
  
     for header, param in GlobalOptions.decode_header_params:
        decode_header_param(msg, header, param)
-      if header.lower() == "content-type" and msg.has_key(header):
-         # reparse type
-         msg.typeheader = msg[header]
-         msg.parsetype() # required for plist...
-         msg.parseplist() #... and reparse decoded plist
  
  
  
  
-def set_content_type(msg, newtype, charset=None):
-   plist = msg.getplist()
-   if plist:
-      if charset:
-         newplist = []
-         for p in plist:
-            if p.split('=')[0] == "charset":
-               p = "charset=%s" % charset
-            newplist.append(p)
-         plist = newplist
-
-   elif charset:
-      plist = ["charset=%s" % charset]
+def set_header(msg, header, value):
+   "Replace header"
  
  
+   if msg.has_key(header):
+      msg.replace_header(header, value)
     else:
     else:
-      plist = []
+      msg[header] = value
+
+
+def set_content_type(msg, newtype, charset=None):
+   msg.set_type(newtype)
+
+   if charset:
+      msg.set_param("charset", charset, "Content-Type")
  
  
-   if plist and plist[0]: plist.insert(0, '')
-   msg["Content-Type"] = "%s%s" % (newtype, ";\n ".join(plist))
  
  
  caps = None # Globally stored mailcap database; initialized only if needed
  
  
  caps = None # Globally stored mailcap database; initialized only if needed
@@ -244,7 +155,7 @@ def decode_body(msg, s):
     if caps is None:
        caps = mailcap.getcaps()
  
     if caps is None:
        caps = mailcap.getcaps()
  
-   content_type = msg.gettype()
+   content_type = msg.get_content_type()
     filename = tempfile.mktemp()
     command = None
  
     filename = tempfile.mktemp()
     command = None
  
@@ -271,11 +182,7 @@ def decode_body(msg, s):
     os.remove(filename)
  
     set_content_type(msg, "text/plain")
     os.remove(filename)
  
     set_content_type(msg, "text/plain")
-   msg["X-MIME-Body-Autoconverted"] = "from %s to text/plain by %s id %s" % (content_type, host_name, command.split()[0])
-
-   msg.maintype = "text"
-   msg.subtype = "plain"
-   msg.type = "text/plain"
+   msg["X-MIME-Autoconverted"] = "from %s to text/plain by %s id %s" % (content_type, host_name, command.split()[0])
  
     return s
  
  
     return s
  
@@ -283,25 +190,26 @@ def decode_body(msg, s):
  def recode_charset(msg, s):
     "Recode charset of the message to the default charset"
  
  def recode_charset(msg, s):
     "Recode charset of the message to the default charset"
  
-   save_charset = charset = msg.getparam("charset")
+   save_charset = charset = msg.get_content_charset()
     if charset and charset <> GlobalOptions.default_charset:
        s = recode2(s, charset)
     if charset and charset <> GlobalOptions.default_charset:
        s = recode2(s, charset)
-      content_type = msg.gettype()
+      content_type = msg.get_content_type()
        set_content_type(msg, content_type, GlobalOptions.default_charset)
        set_content_type(msg, content_type, GlobalOptions.default_charset)
-      msg["X-MIME-Charset-Autoconverted"] = "from %s to %s by %s id %s" % (save_charset, GlobalOptions.default_charset, host_name, me)
+      msg["X-MIME-Autoconverted"] = "from %s to %s by %s id %s" % (save_charset, GlobalOptions.default_charset, host_name, me)
     return s
  
  
     return s
  
  
-def totext(msg, infile):
-   "Convert infile (StringIO) content to text"
+def totext(msg, instring):
+   "Convert instring content to text"
  
  
-   if msg.getmaintype() == "multipart": # Recursively decode all parts of the multipart message
-      newfile = StringIO("%s\n%s" % (msg, infile.getvalue()))
+   if msg.is_multipart(): # Recursively decode all parts of the multipart message
+      newfile = StringIO(str(msg))
+      newfile.seek(0)
        decode_file(newfile)
        return
  
     # Decode body and recode charset
        decode_file(newfile)
        return
  
     # Decode body and recode charset
-   s = decode_body(msg, infile.getvalue())
+   s = decode_body(msg, instring)
     if GlobalOptions.recode_charset:
        s = recode_charset(msg, s)
  
     if GlobalOptions.recode_charset:
        s = recode_charset(msg, s)
  
@@ -309,30 +217,36 @@ def totext(msg, infile):
     output(s)
  
  
     output(s)
  
  
-def decode_part(msg, infile):
+def decode_part(msg):
     "Decode one part of the message"
  
     "Decode one part of the message"
  
-   encoding = msg.getencoding()
-   outfile = StringIO()
+   decode_headers(msg)
+   encoding = msg["Content-Transfer-Encoding"]
  
  
-   if encoding in ('', '7bit', '8bit', 'binary'):
-      mimetools.copyliteral(infile, outfile)
+   if encoding in (None, '', '7bit', '8bit', 'binary'):
+      outstring = str(msg.get_payload())
     else: # Decode from transfer ecoding to text or binary form
     else: # Decode from transfer ecoding to text or binary form
-      mimetools.decode(infile, outfile, encoding)
-      msg["Content-Transfer-Encoding"] = "8bit"
+      outstring = str(msg.get_payload(decode=1))
+      set_header(msg, "Content-Transfer-Encoding", "8bit")
        msg["X-MIME-Autoconverted"] = "from %s to 8bit by %s id %s" % (encoding, host_name, me)
  
        msg["X-MIME-Autoconverted"] = "from %s to 8bit by %s id %s" % (encoding, host_name, me)
  
-   decode_headers(msg)
-
     # Test all mask lists and find what to do with this content type
     # Test all mask lists and find what to do with this content type
-
-   for content_type in msg.gettype(), msg.getmaintype()+"/*", "*/*":
+   masks = []
+   ctype = msg.get_content_type()
+   if ctype:
+      masks.append(ctype)
+   mtype = msg.get_content_maintype()
+   if mtype:
+      masks.append(mtype + '/*')
+   masks.append('*/*')
+
+   for content_type in masks:
        if content_type in GlobalOptions.totext_mask:
        if content_type in GlobalOptions.totext_mask:
-         totext(msg, outfile)
+         totext(msg, outstring)
           return
        elif content_type in GlobalOptions.binary_mask:
           output_headers(msg)
           return
        elif content_type in GlobalOptions.binary_mask:
           output_headers(msg)
-         output(outfile.getvalue())
+         output(outstring)
           return
        elif content_type in GlobalOptions.ignore_mask:
           output_headers(msg)
           return
        elif content_type in GlobalOptions.ignore_mask:
           output_headers(msg)
@@ -342,43 +256,37 @@ def decode_part(msg, infile):
           raise ValueError, "content type `%s' prohibited" % content_type
  
     # Neither content type nor masks were listed - decode by default
           raise ValueError, "content type `%s' prohibited" % content_type
  
     # Neither content type nor masks were listed - decode by default
-   totext(msg, outfile)
+   totext(msg, outstring)
  
  
  
  
-def decode_file(infile, seekable=1):
+def decode_file(infile):
     "Decode the entire message"
  
     "Decode the entire message"
  
-   m = mimetools.Message(infile)
-   boundary = m.getparam("boundary")
+   msg = email.message_from_file(infile)
+   boundary = msg.get_boundary()
  
  
-   if not boundary:
-      if not m.getheader("Content-Type"): # Not a message, just text - copy it literally
-         output(infile.read())
+   if msg.is_multipart():
+      decode_headers(msg)
+      output_headers(msg)
  
  
-      else: # Simple one-part message - decode it
-         decode_part(m, infile)
+      if msg.preamble: # Preserve the first part, it is probably not a RFC822-message
+         output(msg.preamble) # Usually it is just a few lines of text (MIME warning)
  
  
-   else: # MIME message - decode all parts; may be recursive
-      decode_headers(m)
-      output_headers(m)
-
-      import multifile
-      mf = multifile.MultiFile(infile, seekable)
-      mf.push(boundary)
+      for subpart in msg.get_payload():
+         output("\n--%s\n" % boundary)
+         decode_part(subpart)
  
  
-      if not seekable: # Preserve the first part, it is probably not a RFC822-message
-         output(mf.read()) # Usually it is just a few lines of text (MIME warning)
+      output("\n--%s--\n" % boundary)
  
  
-      while 1:
-         m = mimetools.Message(mf)
-         decode_part(m, mf)
+      if msg.epilogue:
+         output(msg.epilogue)
  
  
-         if not mf.next():
-            break
-         output("\n--%s\n" % boundary)
+   else:
+      if msg.has_key("Content-Type"): # Simple one-part message - decode it
+         decode_part(msg)
  
  
-      mf.pop()
-      output("\n--%s--\n" % boundary)
+      else: # Not a message, just text - copy it literally
+         output(str(msg))
  
  
  class GlobalOptions:
  
  
  class GlobalOptions:
@@ -445,7 +353,6 @@ def init():
  if __name__ == "__main__":
     arguments = init()
  
  if __name__ == "__main__":
     arguments = init()
  
-   seekable = 0
     if len(arguments) == 0:
        infile = sys.stdin
     elif len(arguments) <> 1:
     if len(arguments) == 0:
        infile = sys.stdin
     elif len(arguments) <> 1:
@@ -454,6 +361,5 @@ if __name__ == "__main__":
        infile = sys.stdin
     else:
        infile = open(arguments[0], 'r')
        infile = sys.stdin
     else:
        infile = open(arguments[0], 'r')
-      seekable = 1
  
  
-   decode_file(infile, seekable)
+   decode_file(infile)
diff --git a/setup.py b/setup.py

new file mode 100755 (executable)

index 0000000..f5c7e42
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,18 @@
+#! /usr/bin/env python
+
+
+from distutils.core import setup
+from mimedecode import __version__ as version
+
+
+setup(name = "mimedecode",
+   version = version + 'd',
+   description = "BroytMann mimedecode.py",
+   long_description = "BroytMann mimedecode.py, Copyright (C) 2001-2004 PhiloSoft Design",
+   author = "Oleg Broytmann",
+   author_email = "phd@phd.pp.ru",
+   url = "http://phd.pp.ru/Software/Python/#mimedecode",
+   license = "GPL",
+   platforms = "All",
+   scripts = ['mimedecode.py']
+)
author	Oleg Broytman <phd@phdru.name>
	Mon, 24 Apr 2006 16:09:52 +0000 (16:09 +0000)
committer	Oleg Broytman <phd@phdru.name>
	Mon, 24 Apr 2006 16:09:52 +0000 (16:09 +0000)
ANNOUNCE	[new file with mode: 0644]	patch \| blob
MANIFEST.in	[new file with mode: 0644]	patch \| blob
Makefile	[new file with mode: 0644]	patch \| blob
Makefile.4xslt	[new file with mode: 0644]	patch \| blob
Makefile.sgmlt	[new file with mode: 0644]	patch \| blob
mimedecode.docbook	[new file with mode: 0644]	patch \| blob
mimedecode.py		patch \| blob \| history
setup.py	[new file with mode: 0755]	patch \| blob