]> git.phdru.name Git - dotfiles.git/commitdiff
bin/cleanup-filenames.sh: Remove accents
authorOleg Broytman <phd@phdru.name>
Mon, 6 Nov 2023 00:37:41 +0000 (03:37 +0300)
committerOleg Broytman <phd@phdru.name>
Mon, 6 Nov 2023 00:37:41 +0000 (03:37 +0300)
Convert latin1 characters to ascii.

bin/cleanup-filenames.sh
bin/unicode_norm_nfd.py [new file with mode: 0755]

index c977312b8d06eec9a90b121868c04edfd35fcc3b..a517e966996bc62f95ef7d8111a081252c3e5553 100755 (executable)
@@ -7,7 +7,7 @@ fi
 
 for fname in "$@"; do
 
-   newname=$(echo "$fname" | sed -e "
+    newname=$(echo "$fname" | sed -e "
 s/[‘“’”‘“’”″′«»\`\"]/'/g
 s/ *: */ - /g
 s/[–—]/-/g
@@ -17,10 +17,11 @@ s/\?//g
 s/№/N/g
 ")
 
-   if [ "$fname" \!= "$newname" ]; then
-      echo "$fname"
-      echo "$newname"
-      mv -- "$fname" "$newname"
-      echo
-   fi
+    newname="$(unicode_norm_nfd.py "$newname")"
+    if [ "$fname" \!= "$newname" ]; then
+        echo "$fname"
+        echo "$newname"
+        mv -- "$fname" "$newname"
+        echo
+    fi
 done
diff --git a/bin/unicode_norm_nfd.py b/bin/unicode_norm_nfd.py
new file mode 100755 (executable)
index 0000000..2cca0d9
--- /dev/null
@@ -0,0 +1,23 @@
+#! /usr/bin/env python3
+# https://stackoverflow.com/a/518232/7976758
+
+import sys
+import unicodedata
+
+
+def strip_accents(s):
+    return ''.join(c for c in unicodedata.normalize('NFD', s)
+                   if unicodedata.category(c) != 'Mn')
+
+
+def latin1_to_ascii(uinput):
+    if isinstance(uinput, bytes):
+        uinput = uinput.decode(sys.getfilesystemencoding())
+    return strip_accents(uinput)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 1:
+        sys.exit('Usage: %s name\n' % sys.argv[0])
+    for name in sys.argv[1:]:
+        print(latin1_to_ascii(name))