From 607e68504d32244e26d4a4c01532f5168f03f4e8 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Mon, 10 Mar 2014 20:06:14 +0400 Subject: [PATCH] Add option -B to skip content-transfer-decoding --- ANNOUNCE | 2 + TODO | 8 +--- mimedecode.docbook | 14 +++++- mimedecode.py | 36 ++++++++------ test/expected/msg_13-1.txt | 96 ++++++++++++++++++++++++++++++++++++++ test/test_all | 1 + 6 files changed, 136 insertions(+), 21 deletions(-) create mode 100644 test/expected/msg_13-1.txt diff --git a/ANNOUNCE b/ANNOUNCE index cf2f308..58e552e 100644 --- a/ANNOUNCE +++ b/ANNOUNCE @@ -31,6 +31,8 @@ level). Add option --set-param=header:param=value to set header parameter's value (only at the top level). The header must exist. + Add option -B to skip content-transfer-decoding binary attachments. + WHAT'S NEW in version 2.4.0 (2014-03-08) Change option -d to accept a comma-separated list of headers: diff --git a/TODO b/TODO index 53d09af..29cc0a7 100644 --- a/TODO +++ b/TODO @@ -1,11 +1,7 @@ -Add options --save-headers, --save-body and --save-message to save -decoded headers/bodies/messages of parts to files. - Add option -O to set the destination directory. - -Add an option -B to skip content-transfer-decoding binary attachments (leave it -as base64 or such). +Add options --save-headers, --save-body and --save-message to save +decoded headers/bodies/messages of parts to files. Release 2.5.0. diff --git a/mimedecode.docbook b/mimedecode.docbook index 8a939f6..4011979 100644 --- a/mimedecode.docbook +++ b/mimedecode.docbook @@ -90,7 +90,7 @@ - + @@ -394,6 +394,16 @@ + + -B mask + + + Append mask to the list of binary content types that will be not + content-transfer-decoded (will be left as base64 or such). + + + + -e mask @@ -440,7 +450,7 @@ cat input_file | mimedecode.py -o output_file - The 4 list options (-beit) require more explanation. They allow a user to + The 5 list options (-Bbeit) require more explanation. They allow a user to control body decoding with great flexibility. Think about said mail archive; for example, its maintainer wants to put there only texts, convert Postscript/PDF to text, pass HTML and images as is, and ignore everything diff --git a/mimedecode.py b/mimedecode.py index 50bdcd4..c85bb8d 100755 --- a/mimedecode.py +++ b/mimedecode.py @@ -18,7 +18,7 @@ Broytman mimedecode.py version %s, %s def usage(code=0, errormsg=''): version(0) sys.stdout.write("""\ - Usage: %s [-h|--help] [-V|--version] [-cCDP] [-H|--host=hostname] [-f charset] [-d header1[,h2,...]|*[,-h1,...]] [-p header1[,h2,h3,...]:param1[,p2,p3,...]] [-r header1[,h2,...]|*[,-h1,...]] [-R header1[,h2,h3,...]:param1[,p2,p3,...]] [--set-header header:value] [--set-param header:param=value] [-beit mask] [-o output_file] [input_file [output_file]] + Usage: %s [-h|--help] [-V|--version] [-cCDP] [-H|--host=hostname] [-f charset] [-d header1[,h2,...]|*[,-h1,...]] [-p header1[,h2,h3,...]:param1[,p2,p3,...]] [-r header1[,h2,...]|*[,-h1,...]] [-R header1[,h2,h3,...]:param1[,p2,p3,...]] [--set-header header:value] [--set-param header:param=value] [-Bbeit mask] [-o output_file] [input_file [output_file]] """ % me) if errormsg: sys.stderr.write(errormsg + '\n') @@ -272,14 +272,6 @@ def decode_part(msg): "Decode one part of the message" decode_headers(msg) - encoding = msg["Content-Transfer-Encoding"] - - if encoding in (None, '', '7bit', '8bit', 'binary'): - outstring = str(msg.get_payload()) - else: # Decode from transfer ecoding to text or binary form - outstring = str(msg.get_payload(decode=1)) - set_header(msg, "Content-Transfer-Encoding", "8bit") - msg["X-MIME-Autoconverted"] = "from %s to 8bit by %s id %s" % (encoding, gopts.host_name, me) # Test all mask lists and find what to do with this content type masks = [] @@ -291,11 +283,26 @@ def decode_part(msg): masks.append(mtype + '/*') masks.append('*/*') + left_binary = False + for content_type in masks: + if content_type in gopts.binary_mask: + left_binary = True + break + + encoding = msg["Content-Transfer-Encoding"] + if left_binary or encoding in (None, '', '7bit', '8bit', 'binary'): + outstring = str(msg.get_payload()) + else: # Decode from transfer ecoding to text or binary form + outstring = str(msg.get_payload(decode=1)) + set_header(msg, "Content-Transfer-Encoding", "8bit") + msg["X-MIME-Autoconverted"] = "from %s to 8bit by %s id %s" % (encoding, gopts.host_name, me) + for content_type in masks: if content_type in gopts.totext_mask: totext(msg, outstring) return - elif content_type in gopts.binary_mask: + elif content_type in gopts.binary_mask or \ + content_type in gopts.decoded_binary_mask: output_headers(msg) output(outstring) return @@ -373,7 +380,8 @@ class GlobalOptions: set_header_param = [] totext_mask = [] # A list of content-types to decode - binary_mask = [] # A list to pass through + binary_mask = [] # A list of content-types to pass through + decoded_binary_mask = [] # A list of content-types to pass through (content-transfer-decoded) ignore_mask = [] # Ignore (skip, do not decode and do not include into output) error_mask = [] # Raise error if encounter one of these @@ -388,7 +396,7 @@ def get_opt(): try: options, arguments = getopt(sys.argv[1:], - 'hVcCDPH:f:d:p:r:R:b:e:i:t:o:', + 'hVcCDPH:f:d:p:r:R:b:B:e:i:t:o:', ['help', 'version', 'host=', 'set-header=', 'set-param=']) except GetoptError: usage(1) @@ -431,8 +439,10 @@ def get_opt(): gopts.set_header_param.append((header, param, value)) elif option == '-t': gopts.totext_mask.append(value) - elif option == '-b': + elif option == '-B': gopts.binary_mask.append(value) + elif option == '-b': + gopts.decoded_binary_mask.append(value) elif option == '-i': gopts.ignore_mask.append(value) elif option == '-e': diff --git a/test/expected/msg_13-1.txt b/test/expected/msg_13-1.txt new file mode 100644 index 0000000..e5728bb --- /dev/null +++ b/test/expected/msg_13-1.txt @@ -0,0 +1,96 @@ +From test Sat Feb 1 00:00:00 2014 +MIME-Version: 1.0 +From: Barry +To: Dingus Lovers +Subject: Here is your dingus fish +Date: Fri, 20 Apr 2001 19:35:02 -0400 +Content-Type: multipart/mixed; boundary="OUTER" + + +--OUTER +Content-Type: text/plain; charset="us-ascii" + +A text/plain part + +--OUTER +Content-Type: multipart/mixed; boundary=BOUNDARY + + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" + +Hi there, + +This is the dingus fish. + +--BOUNDARY +Content-Type: image/gif; name="dingusfish.gif" +Content-Transfer-Encoding: base64 +content-disposition: attachment; filename="dingusfish.gif" + +R0lGODdhAAEAAfAAAP///wAAACwAAAAAAAEAAQAC/oSPqcvtD6OctNqLs968+w+G4kiW5omm6sq2 +7gvH8kzX9o3n+s73/g8MCofEovGITGICTKbyCV0FDNOo9SqpQqpOrJfXzTQj2vD3TGtqL+NtGQ2f +qTXmxzuOd7WXdcc9DyjU53ewFni4s0fGhdiYaEhGBelICTNoV1j5NUnFcrmUqemjNifJVWpaOqaI +oFq3SspZsSraE7sHq3jr1MZqWvi662vxV4tD+pvKW6aLDOCLyur8PDwbanyDeq0N3DctbQYeLDvR +RY6t95m6UB0d3mwIrV7e2VGNvjjffukeJp4w7F65KecGFsTHQGAygOrgrWs1jt28Rc88KESYcGLA +/obvTkH6p+CinWJiJmIMqXGQwH/y4qk0SYjgQTczT3ajKZGfuI0uJ4kkVI/DT5s3/ejkxI0aT4Y+ +YTYgWbImUaXk9nlLmnSh1qJiJFl0OpUqRK4oOy7NyRQtHWofhoYVxkwWXKUSn0YsS+fUV6lhqfYb +6ayd3Z5qQdG1B7bvQzaJjwUV2lixMUZ7JVsOlfjWVr/3NB/uFvnySBN6Dcb6rGwaRM3wsormw5cC +M9NxWy/bWdufudCvy8bOAjXjVVwta/uO21sE5RHBCzNFXtgq9ORtH4eYjVP4Yryo026nvkFmCeyA +B29efV6ravCMK5JwWd5897Qrx7ll38o6iHDZ/rXPR//feevhF4l7wjUGX3xq1eeRfM4RSJGBIV1D +z1gKPkfWag3mVBVvva1RlX5bAJTPR/2YqNtw/FkIYYEi/pIZiAdpcxpoHtmnYYoZtvhUftzdx5ZX +JSKDW405zkGcZzzGZ6KEv4FI224oDmijlEf+xp6MJK5ojY/ASeVUR+wsKRuJ+XFZ5o7ZeEime8t1 +ouUsU6YjF5ZtUihhkGfCdFQLWQFJ3UXxmElfhQnR+eCdcDbkFZp6vTRmj56ApCihn5QGpaToNZmR +n3NVSpZcQpZ2KEONusaiCsKAug0wkQbJSFO+PTSjneGxOuFjPlUk3ovWvdIerjUg9ZGIOtGq/qeX +eCYrrCX+1UPsgTKGGRSbzd5q156d/gpfbJxe66eD5iQKrXj7RGgruGxs62qebBHUKS32CKluCiqZ +qh+pmehmEb71noAUoe5e9Zm17S7773V10pjrtG4CmuurCV/n6zLK5turWNhqOvFXbjhZrMD0YhKe +wR0zOyuvsh6MWrGoIuzvyWu5y1WIFAqmJselypxXh6dKLNOKEB98L88bS2rkNqqlKzCNJp9c0G0j +Gzh0iRrCbHSXmPR643QS+4rWhgFmnSbSuXCjS0xAOWkU2UdLqyuUNfHSFdUouy3bm5i5GnDM3tG8 +doJ4r5tqu3pPbRSVfvs8uJzeNXhp3n4j/tZ42SwH7eaWUUOjc3qFV9453UHTXZfcLH+OeNs5g36x +lBnHvTm7EbMbLeuaLncao8vWCXimfo1o+843Ak6y4ChNeGntvAYvfLK4ezmoyNIbNCLTCXO9ZV3A +E8/s88RczPzDwI4Ob7XZyl7+9Miban29h+tJZPrE21wgvBphDfrrfPdCTPKJD/y98L1rZwHcV6Jq +Zab0metpuNIX/qAFPoz171WUaUb4HAhBSzHuHfjzHb3kha/2Cctis/ORArVHNYfFyYRH2pYIRzic +isVOfPWD1b6mRTqpCRBozzof6UZVvFXRxWIr3GGrEviGYgyPMfahheiSaLs/9QeFu7oZ/ndSY8DD +ya9x+uPed+7mxN2IzIISBOMLFYWVqC3Pew1T2nFuuCiwZS5/v6II10i4t1OJcUH2U9zxKodHsGGv +Oa+zkvNUYUOa/TCCRutF9MzDwdlUMJADTCGSbDQ5OV4PTamDoPEi6Ecc/RF5RWwkcdSXvSOaDWSn +I9LlvubFTQpuc6JKXLcKeb+xdbKRBnwREemXyjg6ME65aJiOuBgrktzykfPLJBKR9ClMavJ62/Ff +BlNIyod9yX9wcSXexnXFpvkrbXk64xsx5Db7wXKP5fSgsvwIMM/9631VLBfkmtbHRXpqmtei52hG +pUwSlo+BASQoeILDOBgREECxBBh5/iYmNsQ9dIv5+OI++QkqdsJPc3uykz5fkM+OraeekcQF7X4n +B5S67za5U967PmooGQhUXfF7afXyCD7ONdRe17QogYjVx38uLwtrS6nhTnm15LQUnu9E2uK6CNI/ +1HOABj0ESwOjut4FEpFQpdNAm4K2LHnDWHNcmKB2ioKBogysVZtMO2nSxUdZ8Yk2kJc7URioLVI0 +YgmtIwZj4LoeKemgnOnbUdGnzZ4Oa6scqiolBGqS6RgWNLu0RMhcaE6rhhU4hiuqFXPAG8fGwTPW +FKeLMtdVmXLSs5YJGF/YeVm7rREMlY3UYE+yCxbaMXX8y15m5zVHq6GOKDMynzII/jdUHdyVqIy0 +ifX2+r/EgtZcvRzSb72gU9ui87M2VecjKildW/aFqaYhKoryUjfB/g4qtyVuc60xFDGmCxwjW+qu +zjuwl2GkOWn66+3QiiEctvd04OVvcCVzjgT7lrkvjVGKKHmmlDUKowSeikb5kK/mJReuWOxONx+s +ULsl+Lqb0CVn0SrVyJ6wt4t6yTeSCafhPhAf0OXn6L60UMxiLolFAtmN35S2Ob1lZpQ1r/n0Qb5D +oQ1zJiRVDgF8N3Q8TYfbi3DyWCy3lT1nxyBs6FT3S2GOzWRlxwKvlRP0RPJA9SjxEy0UoEnkA+M4 +cnzLMJrBGWLFEaaUb5lvpqbq/loOaU5+DFuHPxo82/OZuM8FXG3oVNZhtWpMpb/0Xu5m/LfLhHZQ +7yuVI0MqZ7NE43imC8jH3IwGZlbPm0xkJYs7+2U48hXTsFSMqgGDvai0kLxyynKNT/waj+q1c1tz +GjOpPBgdCSq3UKZxCSsqFIY+O6JbAWGWcV1pwqLyj5sGqCF1xb1F3varUWqrJv6cN3PrUXzijtfZ +FshpBL3Xwr4GIPvU2N8EjrJgS1zl21rbXQMXeXc5jjFyrhpCzijSv/RQtyPSzHCFMhlME95fHglt +pRsX+dfSQjUeHAlpWzJ5iOo79Ldnaxai6bXTcGO3fp07ri7HLEmXXPlYi8bv/qVxvNcdra6m7Rlb +6JBTb5fd66VhFRjGArh2n7R1rDW4P5NOT9K0I183T2scYkeZ3q/VFyLb09U9ajzXBS8Kgkhc4mBS +kYY9cy3Vy9lUnuNJH8HGIclUilwnBtjUOH0gteGOZ4c/XNrhXLSYDyxfnD8z1pDy7rYRvDolhnbe +UMzxCZUs40s6s7UIvBnLgc0+vKuOkIXeOrDymlp+Zxra4MZLBbVrqD/jTJ597pDmnw5c4+DbyB88 +9Cg9DodYcSuMZT/114pptqc/EuTjRPvH/z5slzI3tluOEBBLqOXLOX+0I5929tO97wkvl/atCz+y +xJrdwteW2FNW/NSmBP+f/maYtVs/bYyBC7Ox3jsYZHL05CIrBa/nS+b3bHfiYm4Ueil1YZZSgAUI +fFZ1dxUmeA2oQRQ3RuGXNGLFV9/XbGFGPV6kfzk1TBBCd+izc7q1H+OHMJwmaBX2IQNYVAKHYepV +SSGCe6CnbYHHETKGNe43EDvFgZr0gB/nVHPHZ80VV1ojOiI3XDvYIkl4ayo4bxQIgrFXWTvBI0nH +VElWMuw2aLUWCRHHf8ymVCHjFlJnOSojfevCYyyyZDH0IcvHhrsnQ5O1OsWzONuVVKIxSxiFZ/tR +fKDAf6xFTnw4O9Qig2VCfW2hJQrmMOuHW0W3dLQmCMO2ccdUd/xyfflH/olTiHZVdGwb8nIwRzSE +J15jFlOJuBZBZ4CiyHyd2IFylFlB+HgHhYabhWOGwYO1ZH/Og1dtQlFMk352CGRSIFTapnWQEUtN +l4zv8S0aaCFDyGCBqDUxZYpxGHX01y/JuH1xhn7TOCnNCI4eKDs5WGX4R425F4vF1o3BJ4vO0otq +I3rimI7jJY1jISqnBxknCIvruF83mF5wN4X7qGLIhR8A2Vg0yFERSIXn9Vv3GHy3Vj/WIkKddlYi +yIMv2I/VMjTLpW7pt05SWIZR0RPyxpB4SIUM9lBPGBl0GC7oSEEwRYLe4pJpZY2P0zbI1n+Oc44w +qY3PUnmF0ixjVpDD/mJ9wpOBGTVgXlaCaZiPcIWK5NiKBIiPdGaQ0TWGvAiG7nMchdZb7Vgf8zNi +MuMyzRdy/lePe9iC4TRx7WhhOQI/QiSVNAmAa2lT/piFbuh7ofJoYSZzrSZ1bvmWw3eN2nKUPVky +uPN5/VRfohRd0VYZoqhKIlU6TXYhJxmPUIloAwc1bPmHEpaZYZORHNlXUJM07hATwHR8MJYqkwWR +WaIezFhxSFlc8/Fq82hEnpeRozg3ULhhr9lAGtVEkCg5ZNRuuVleBPaZadhG0ZgkyPmDOTOKzViM +YgOcpukKqQcbjAWS0IleQ2ROjdh6A+md1qWdBRSX7iSYgFRTtRmBpJioieXJiHfJiMGIR9fJOn8I +MSfXYhspn4ooSa2mSAj4n+8Bmg03fBJZoPOJgsVZRxu1oOMRPXYYjdqjihFaEoZpXBREanuJoRI6 +cibFinq4ngUKh/wQd/H5ofYCZ0HJXR62opZFaAT0iFIZo4DIiUojkjeqKiuoZirKo5Y1a7AWckGa +BkuYoD5lpDK6eUs6CkDqpETwl1EqpfhJpVeKpVl6EgUAADs= + +--BOUNDARY-- + +--OUTER-- diff --git a/test/test_all b/test/test_all index 4401053..52b0313 100755 --- a/test/test_all +++ b/test/test_all @@ -58,6 +58,7 @@ test_file msg_01.txt msg_01-2.txt --set-header=X-Test:set --set-param=X-Test:tes test_file msg_02.txt msg_02.txt --set-param=X-Test:test:set test_file msg_02.txt msg_02-1.txt --set-param=X-Mailer:test=set test_file msg_02.txt msg_02-1.txt --set-param=X-Mailer:test:set +test_file msg_13.txt msg_13-1.txt -B '*/*' if [ "$RC" -eq 0 ]; then echo "All tests passed!" -- 2.39.5