X-Git-Url: https://git.phdru.name/?p=dotfiles.git;a=blobdiff_plain;f=bin%2Flatin1_to_ascii.py;h=a2f21455e252bb141d7d0d8d9980c8b1b2d70347;hp=974edd9b01614f7bd6defb95d1bd1603c7ded0ce;hb=HEAD;hpb=c5883d2a782366c0a3468a989e756cf37dabbd46 diff --git a/bin/latin1_to_ascii.py b/bin/latin1_to_ascii.py deleted file mode 100755 index 974edd9..0000000 --- a/bin/latin1_to_ascii.py +++ /dev/null @@ -1,125 +0,0 @@ -#! /usr/bin/env python - -# See http://code.activestate.com/recipes/251871/ - -import sys, os - -xlate = { - u'\N{ACUTE ACCENT}': "'", - u'\N{BROKEN BAR}': '|', - u'\N{CEDILLA}': '{cedilla}', - u'\N{CENT SIGN}': '{cent}', - u'\N{COPYRIGHT SIGN}': '{C}', - u'\N{CURRENCY SIGN}': '{currency}', - u'\N{DEGREE SIGN}': '{degrees}', - u'\N{DIAERESIS}': '{umlaut}', - u'\N{DIVISION SIGN}': '/', - u'\N{FEMININE ORDINAL INDICATOR}': '{^a}', - u'\N{INVERTED EXCLAMATION MARK}': '!', - u'\N{INVERTED QUESTION MARK}': '?', - u'\N{LATIN CAPITAL LETTER A WITH ACUTE}': 'A', - u'\N{LATIN CAPITAL LETTER A WITH CIRCUMFLEX}': 'A', - u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}': 'A', - u'\N{LATIN CAPITAL LETTER A WITH GRAVE}': 'A', - u'\N{LATIN CAPITAL LETTER A WITH RING ABOVE}': 'A', - u'\N{LATIN CAPITAL LETTER A WITH TILDE}': 'A', - u'\N{LATIN CAPITAL LETTER AE}': 'Ae', - u'\N{LATIN CAPITAL LETTER C WITH CEDILLA}': 'C', - u'\N{LATIN CAPITAL LETTER E WITH ACUTE}': 'E', - u'\N{LATIN CAPITAL LETTER E WITH CIRCUMFLEX}': 'E', - u'\N{LATIN CAPITAL LETTER E WITH DIAERESIS}': 'E', - u'\N{LATIN CAPITAL LETTER E WITH GRAVE}': 'E', - u'\N{LATIN CAPITAL LETTER ETH}': 'Th', - u'\N{LATIN CAPITAL LETTER I WITH ACUTE}': 'I', - u'\N{LATIN CAPITAL LETTER I WITH CIRCUMFLEX}': 'I', - u'\N{LATIN CAPITAL LETTER I WITH DIAERESIS}': 'I', - u'\N{LATIN CAPITAL LETTER I WITH GRAVE}': 'I', - u'\N{LATIN CAPITAL LETTER N WITH TILDE}': 'N', - u'\N{LATIN CAPITAL LETTER O WITH ACUTE}': 'O', - u'\N{LATIN CAPITAL LETTER O WITH CIRCUMFLEX}': 'O', - u'\N{LATIN CAPITAL LETTER O WITH DIAERESIS}': 'O', - u'\N{LATIN CAPITAL LETTER O WITH GRAVE}': 'O', - u'\N{LATIN CAPITAL LETTER O WITH STROKE}': 'O', - u'\N{LATIN CAPITAL LETTER O WITH TILDE}': 'O', - u'\N{LATIN CAPITAL LETTER THORN}': 'th', - u'\N{LATIN CAPITAL LETTER U WITH ACUTE}': 'U', - u'\N{LATIN CAPITAL LETTER U WITH CIRCUMFLEX}': 'U', - u'\N{LATIN CAPITAL LETTER U WITH DIAERESIS}': 'U', - u'\N{LATIN CAPITAL LETTER U WITH GRAVE}': 'U', - u'\N{LATIN CAPITAL LETTER Y WITH ACUTE}': 'Y', - u'\N{LATIN SMALL LETTER A WITH ACUTE}': 'a', - u'\N{LATIN SMALL LETTER A WITH CIRCUMFLEX}': 'a', - u'\N{LATIN SMALL LETTER A WITH DIAERESIS}': 'a', - u'\N{LATIN SMALL LETTER A WITH GRAVE}': 'a', - u'\N{LATIN SMALL LETTER A WITH RING ABOVE}': 'a', - u'\N{LATIN SMALL LETTER A WITH TILDE}': 'a', - u'\N{LATIN SMALL LETTER AE}': 'ae', - u'\N{LATIN SMALL LETTER C WITH CEDILLA}': 'c', - u'\N{LATIN SMALL LETTER E WITH ACUTE}': 'e', - u'\N{LATIN SMALL LETTER E WITH CIRCUMFLEX}': 'e', - u'\N{LATIN SMALL LETTER E WITH DIAERESIS}': 'e', - u'\N{LATIN SMALL LETTER E WITH GRAVE}': 'e', - u'\N{LATIN SMALL LETTER ETH}': 'th', - u'\N{LATIN SMALL LETTER I WITH ACUTE}': 'i', - u'\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}': 'i', - u'\N{LATIN SMALL LETTER I WITH DIAERESIS}': 'i', - u'\N{LATIN SMALL LETTER I WITH GRAVE}': 'i', - u'\N{LATIN SMALL LETTER N WITH TILDE}': 'n', - u'\N{LATIN SMALL LETTER O WITH ACUTE}': 'o', - u'\N{LATIN SMALL LETTER O WITH CIRCUMFLEX}': 'o', - u'\N{LATIN SMALL LETTER O WITH DIAERESIS}': 'o', - u'\N{LATIN SMALL LETTER O WITH GRAVE}': 'o', - u'\N{LATIN SMALL LETTER O WITH STROKE}': 'o', - u'\N{LATIN SMALL LETTER O WITH TILDE}': 'o', - u'\N{LATIN SMALL LETTER SHARP S}': 'ss', - u'\N{LATIN SMALL LETTER THORN}': 'th', - u'\N{LATIN SMALL LETTER U WITH ACUTE}': 'u', - u'\N{LATIN SMALL LETTER U WITH CIRCUMFLEX}': 'u', - u'\N{LATIN SMALL LETTER U WITH DIAERESIS}': 'u', - u'\N{LATIN SMALL LETTER U WITH GRAVE}': 'u', - u'\N{LATIN SMALL LETTER Y WITH ACUTE}': 'y', - u'\N{LATIN SMALL LETTER Y WITH DIAERESIS}': 'y', - u'\N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}': '<<', - u'\N{MACRON}': '_', - u'\N{MASCULINE ORDINAL INDICATOR}': '{^o}', - u'\N{MICRO SIGN}': '{micro}', - u'\N{MIDDLE DOT}': '*', - u'\N{MULTIPLICATION SIGN}': '*', - u'\N{NOT SIGN}': '{not}', - u'\N{PILCROW SIGN}': '{paragraph}', - u'\N{PLUS-MINUS SIGN}': '{+/-}', - u'\N{POUND SIGN}': '{pound}', - u'\N{REGISTERED SIGN}': '{R}', - u'\N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}': '>>', - u'\N{SECTION SIGN}': '{section}', - u'\N{SOFT HYPHEN}': '-', - u'\N{SUPERSCRIPT ONE}': '{^1}', - u'\N{SUPERSCRIPT THREE}': '{^3}', - u'\N{SUPERSCRIPT TWO}': '{^2}', - u'\N{VULGAR FRACTION ONE HALF}': '{1/2}', - u'\N{VULGAR FRACTION ONE QUARTER}': '{1/4}', - u'\N{VULGAR FRACTION THREE QUARTERS}': '{3/4}', - u'\N{YEN SIGN}': '{yen}' -} - -def latin1_to_ascii(uinput): - if not isinstance(uinput, unicode): - uinput = unicode(uinput, sys.getfilesystemencoding()) - out = [] - for c in uinput: - i = ord(c) - if i in xlate: - out.append(xlate[i]) - elif i >= 0x80: - pass - else: - out.append(str(c)) - return ''.join(out) - -if __name__ == '__main__': - if len(sys.argv) == 1: - sys.exit('Usage: %s filename\n' % sys.argv[0]) - for name in sys.argv[1:]: - plain_ascii = latin1_to_ascii(name) - if plain_ascii != name: - os.rename(name, plain_ascii)