+#! /usr/bin/env python
+
+import argparse
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Import')
+ parser.add_argument('-o', '--output', required=True, help='output file')
+ parser.add_argument('input_fnames', nargs='+', help='input files to merge')
+ args = parser.parse_args()
+
+ input_files = {}
+
+ def close_all():
+ for f in input_files.values():
+ f.close()
+
+ for fname in args.input_fnames:
+ try:
+ input_files[fname] = open(fname, 'rtU')
+ except (IOError, OSError):
+ close_all()
+ raise
+
+ # Read and process the first 3 lines
+ counters = {'spam': {}, 'nonspam': {}}
+ for fname, infile in input_files.items():
+ version_line = infile.readline()
+ for _line_no in 1, 2:
+ counter_line = infile.readline().strip()
+ v, counter, tag = counter_line.split('\t')
+ if v != 'v':
+ close_all()
+ raise ValueError(
+ 'Bad v-tag in file %s line %s: unknown v-tag %s, '
+ 'expected "v", got %r' % (fname, counter_line, v))
+ if tag == 'num_spam':
+ try:
+ counters['spam'][fname] = int(counter)
+ except ValueError:
+ close_all()
+ raise
+ elif tag == 'num_nonspam':
+ try:
+ counters['nonspam'][fname] = int(counter)
+ except ValueError:
+ close_all()
+ raise
+ else:
+ close_all()
+ raise ValueError(
+ 'Bad tag in file %s line %s: unknown tag %s, '
+ 'expected "num_spam" or "num_nonspam", got %r' % (
+ fname, counter_line, tag))
+
+ counters_total = {'spam': 0, 'nonspam': 0}
+ for key, files in counters.items():
+ counters_total[key] = sum(files.values())
+
+ try:
+ with open(args.output, 'wt') as outfile:
+ outfile.write(version_line)
+ for key, value in counters_total.items():
+ outfile.write('v\t%d\tnum_%s\n' % (value, key))
+ for infile in input_files.values():
+ for line in infile:
+ outfile.write(line)
+ except (IOError, OSError):
+ close_all()
+ raise
+ close_all()