From 5d095fb843cd453d7e63775c8da7c6d261d33c62 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Mon, 6 Nov 2023 03:37:41 +0300 Subject: [PATCH] bin/cleanup-filenames.sh: Remove accents Convert latin1 characters to ascii. --- bin/cleanup-filenames.sh | 15 ++++++++------- bin/unicode_norm_nfd.py | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 7 deletions(-) create mode 100755 bin/unicode_norm_nfd.py diff --git a/bin/cleanup-filenames.sh b/bin/cleanup-filenames.sh index c977312..a517e96 100755 --- a/bin/cleanup-filenames.sh +++ b/bin/cleanup-filenames.sh @@ -7,7 +7,7 @@ fi for fname in "$@"; do - newname=$(echo "$fname" | sed -e " + newname=$(echo "$fname" | sed -e " s/[‘“’”‘“’”″′«»\`\"]/'/g s/ *: */ - /g s/[–—]/-/g @@ -17,10 +17,11 @@ s/\?//g s/№/N/g ") - if [ "$fname" \!= "$newname" ]; then - echo "$fname" - echo "$newname" - mv -- "$fname" "$newname" - echo - fi + newname="$(unicode_norm_nfd.py "$newname")" + if [ "$fname" \!= "$newname" ]; then + echo "$fname" + echo "$newname" + mv -- "$fname" "$newname" + echo + fi done diff --git a/bin/unicode_norm_nfd.py b/bin/unicode_norm_nfd.py new file mode 100755 index 0000000..2cca0d9 --- /dev/null +++ b/bin/unicode_norm_nfd.py @@ -0,0 +1,23 @@ +#! /usr/bin/env python3 +# https://stackoverflow.com/a/518232/7976758 + +import sys +import unicodedata + + +def strip_accents(s): + return ''.join(c for c in unicodedata.normalize('NFD', s) + if unicodedata.category(c) != 'Mn') + + +def latin1_to_ascii(uinput): + if isinstance(uinput, bytes): + uinput = uinput.decode(sys.getfilesystemencoding()) + return strip_accents(uinput) + + +if __name__ == '__main__': + if len(sys.argv) == 1: + sys.exit('Usage: %s name\n' % sys.argv[0]) + for name in sys.argv[1:]: + print(latin1_to_ascii(name)) -- 2.39.2