#! /usr/bin/env python3
-import sys, os
+import os
-from m_lib.defenc import default_encoding
+from recode_filenames import parse_args, build_recode
-if len(sys.argv) == 1:
- src_encoding = default_encoding
- if src_encoding == 'utf-8':
- sys.exit("Usage: %s [[src_enc] dst_enc [start_dir]]" % sys.argv[0])
- else:
- dst_encoding = 'utf-8'
- start_dir = '.'
-elif len(sys.argv) == 2:
- src_encoding = default_encoding
- dst_encoding = sys.argv[1]
- start_dir = '.'
-elif len(sys.argv) == 3:
- src_encoding = default_encoding
- dst_encoding = sys.argv[1]
- start_dir = sys.argv[2]
-elif len(sys.argv) == 4:
- src_encoding = sys.argv[1]
- dst_encoding = sys.argv[2]
- start_dir = sys.argv[3]
-else:
- sys.exit("Usage: %s [[src_enc] dst_enc [start_dir]]" % sys.argv[0])
-
-# Fake for recode_filenames.py
-sys.argv = ['', src_encoding, dst_encoding]
-from recode_filenames import _recode
+from_encoding, to_encoding, dirnames = parse_args(default='.')
+_recode = build_recode(from_encoding, to_encoding)
def _onerror(exc):
raise exc
-plist = list(os.walk(start_dir, topdown=False, onerror=_onerror))
-
save_dir = os.getcwd()
-for dirname, _subdirs, fnames in plist:
- if dirname == '.':
- continue
- os.chdir(dirname)
- for filename in fnames:
- # if not exists - it was renamed already
- if os.path.exists(filename) and \
- os.path.isfile(filename):
- newname = _recode(filename)
- if newname != filename:
- os.rename(filename, newname)
- os.chdir('..')
- dirname = os.path.basename(dirname)
- newname = _recode(dirname)
- if newname != dirname:
- os.rename(dirname, newname)
- os.chdir(save_dir)
+for start_dir in dirnames:
+ for dirname, _subdirs, fnames in list(
+ os.walk(start_dir, topdown=False, onerror=_onerror)
+ ):
+ if dirname != '.':
+ os.chdir(dirname)
+ for filename in fnames:
+ # if not exists - it was renamed already
+ if os.path.exists(filename) and \
+ os.path.isfile(filename):
+ newname = _recode(filename)
+ if not isinstance(newname, str):
+ newname = newname.decode()
+ if newname != filename:
+ os.rename(filename, newname)
+ if dirname != '.':
+ os.chdir('..')
+ dirname = os.path.basename(dirname)
+ newname = _recode(dirname)
+ if not isinstance(newname, str):
+ newname = newname.decode()
+ if newname != dirname:
+ os.rename(dirname, newname)
+ os.chdir(save_dir)
#! /usr/bin/env python3
# -*- coding: koi8-r -*-
+import argparse
import sys
-src_encoding = sys.argv[1]
-dst_encoding = sys.argv[2]
+from m_lib.defenc import default_encoding
-if src_encoding == "translit":
- if dst_encoding == "koi8-r":
- from m_lib.rus.lat2rus import lat2koi as _recode
- elif dst_encoding == "cp1251":
- from m_lib.rus.lat2rus import lat2win as _recode
+
+def parse_args(default=None):
+ parser = argparse.ArgumentParser(description='Recode filenames')
+ parser.add_argument('-f', '--from-encoding', help='from encoding')
+ parser.add_argument('-t', '--to-encoding', help='to encoding')
+ parser.add_argument('filename', nargs='*' if default else '+',
+ default=[default], help='filenames to recode')
+ args = parser.parse_args()
+
+ from_encoding = args.from_encoding
+ to_encoding = args.to_encoding
+
+ if from_encoding:
+ if to_encoding:
+ pass # Everything is defined, no need to guess
+ elif from_encoding == 'utf-8':
+ if default_encoding == 'utf-8':
+ sys.exit('Cannot guess to_encoding')
+ else:
+ to_encoding = default_encoding
+ else:
+ to_encoding = 'utf-8'
+ elif to_encoding:
+ if to_encoding == default_encoding:
+ if default_encoding == 'utf-8':
+ sys.exit('Cannot guess from_encoding')
+ else:
+ from_encoding = 'utf-8'
+ else:
+ from_encoding = default_encoding
else:
- raise NotImplementedError("destination encoding must be koi8-r or cp1251, not `%s'" % dst_encoding)
+ if default_encoding == 'utf-8':
+ sys.exit('Cannot guess encodings')
+ else:
+ from_encoding = default_encoding
+ to_encoding = 'utf-8'
+
+ return from_encoding, to_encoding, args.filename
+
+
+def build_recode(from_encoding, to_encoding):
+ if from_encoding == "translit":
+ if to_encoding == "koi8-r":
+ from m_lib.rus.lat2rus import lat2koi as _recode
+ elif to_encoding == "cp1251":
+ from m_lib.rus.lat2rus import lat2win as _recode
+ else:
+ raise NotImplementedError(
+ "destination encoding must be koi8-r or cp1251, "
+ "not `%s'" % to_encoding)
+
+ elif to_encoding == "translit":
+ if from_encoding == "koi8-r":
+ from m_lib.rus.rus2lat import koi2lat as _recode
+ elif from_encoding == "cp1251":
+ from m_lib.rus.rus2lat import win2lat as _recode
+ else:
+ raise NotImplementedError(
+ "source encoding must be koi8-r or cp1251, "
+ "not `%s'" % from_encoding)
+
+ from m_lib.rus.rus2lat import koi2lat_d
+ koi2lat_d["Ъ"] = '' # remove apostrophs -
+ koi2lat_d["Ь"] = '' # they are not very good characters in filenames
+ koi2lat_d["ъ"] = '' # especially on Windoze
+ koi2lat_d["ь"] = '' # :-)
+
+ elif from_encoding == "url":
+ try:
+ from_encoding, to_encoding = to_encoding.split('/')
+ except ValueError:
+ from_encoding = to_encoding
+ from urllib.parse import unquote
+ from m_lib.opstring import recode
+
+ def _recode(s):
+ s = unquote(s)
+ if from_encoding != to_encoding:
+ s = recode(s, from_encoding, to_encoding, "replace")
+ return s
+
+ elif to_encoding == "url":
+ try:
+ from_encoding, to_encoding = from_encoding.split('/')
+ except ValueError:
+ to_encoding = from_encoding
+ from urllib.parse import quote
+ from m_lib.opstring import recode
+
+ def _recode(s):
+ if from_encoding != to_encoding:
+ s = recode(s, from_encoding, to_encoding, "replace")
+ # wget treats them as safe
+ # vvvvvvvvvvvvv
+ return quote(s, safe=";/?:@&=+$,()'")
-elif dst_encoding == "translit":
- if src_encoding == "koi8-r":
- from m_lib.rus.rus2lat import koi2lat as _recode
- elif src_encoding == "cp1251":
- from m_lib.rus.rus2lat import win2lat as _recode
else:
- raise NotImplementedError("source encoding must be koi8-r or cp1251, not `%s'" % src_encoding)
-
- from m_lib.rus.rus2lat import koi2lat_d
- koi2lat_d["Ъ"] = '' # remove apostrophs -
- koi2lat_d["Ь"] = '' # they are not very good characters in filenames
- koi2lat_d["ъ"] = '' # especially on Windoze
- koi2lat_d["ь"] = '' # :-)
-
-elif src_encoding == "url":
- try:
- src_encoding, dst_encoding = dst_encoding.split('/')
- except ValueError:
- src_encoding = dst_encoding
- from m_lib.opstring import recode
- import urllib
- def _recode(s):
- s = urllib.unquote(s)
- if src_encoding != dst_encoding:
- s = recode(s, src_encoding, dst_encoding, "replace")
- return s
-
-elif dst_encoding == "url":
- try:
- src_encoding, dst_encoding = src_encoding.split('/')
- except ValueError:
- dst_encoding = src_encoding
- from m_lib.opstring import recode
- import urllib
- def _recode(s):
- if src_encoding != dst_encoding:
- s = recode(s, src_encoding, dst_encoding, "replace")
- return urllib.quote(s, safe=";/?:@&=+$,()'") # wget treats them as safe
-
-else:
- from m_lib.opstring import recode
- def _recode(s):
- return recode(s, src_encoding, dst_encoding, "replace")
+ def _recode(s):
+ return s.encode(to_encoding, "surrogateescape").\
+ decode(from_encoding, "surrogateescape")
+
+ return _recode
if __name__ == "__main__":
import os
- for filename in sys.argv[3:]:
+ from_encoding, to_encoding, filenames = parse_args()
+ _recode = build_recode(from_encoding, to_encoding)
+ for filename in filenames:
new_name = _recode(filename)
- if type(filename) is not type(new_name):
+ if not isinstance(new_name, str):
new_name = new_name.decode()
if new_name != filename:
+ sys.stdout.buffer.write(
+ b'%s %s / %s -> %s\n' % (
+ from_encoding.encode(), to_encoding.encode(),
+ filename.encode(default_encoding, 'replace'),
+ new_name.encode())
+ )
os.rename(filename, new_name)