epcim
10/23/2015 - 12:42 PM

text-removeDiacritics.py

# -*- coding: utf-8 -*-
#!/bin/python


import os,sys
import unicodedata

def getFiles(dir,parrent=""):
    foundFiles = []

    if dir[-1:] == "/":
      dir = dir[0:-1]

    print "Entering dir:",dir

    for node in os.listdir(dir):

      #print "\tFound node:",node

      if os.path.isdir(dir + "/" + node):
        fixedNode = fixName(node,parrent=parrent)
        reName(dir + "/" + node, dir + "/" + fixedNode)
        getFiles( dir + "/" + fixedNode, parrent= dir + "/" + fixedNode )

      if os.path.isfile(dir + "/" + node):
        fixedNode = fixName(node,parrent=parrent)
        reName(dir + "/" + node, dir + "/" + fixedNode)
        pass

      if node!=fixedNode:
        print "INFO: ", parrent + "/" + node
        print "FIXE: ", parrent + "/" + fixedNode


# ěščřžýáíé
trtable = (
            ('\xa0','a'), ('\xd8','e'),
            ('\xac','c'), ('\xa1','i'),
            #('\x85','u'),
            ('\xe9','u'),
            ('\xfd','r'),
            ('\xa7','z'),
            ('\xbe','z'),
            ('\xa2','o'),
            ('\xe6','s'),
            ('\xec','y'),
            ('\xe1','a'),
            ('\xed','i'),
            ('\xf8','r'),
            ('\xa9','s'),
            ('\xb9','s'),
            ('\xe8','c'),
            ('\xf2','n'),
            ('\xc8','c'),
            ('\xbb','t'),
            ('\xa6','z'),
            ('\xa0','a'),
            ('\x9f','c'),
            ('\xe7','s'),
            ('\xa3','a'),
            ('\x82','y'),
            ('\xd4','z'),
            ('\xe7','s'),
            ('\x9c','t'),
            ('\xe5','n'),
            ('\xe0','o'),
            ('\xf9','u'),
            ('\xd6','I'),
            ('\xef',''),
            ('\x93','u'),
            ('\x96','l'),
            ('\xb4',''),
            ('\xfa','u'),
            ('\xff','i'),
            ('\xae','z'),
            ('\xfc','r'),
            ('\xab','c'),
            ('\x87','s'),
            ('\xf3','i'),
            ('\xf5','z'),
            ('\xe2','d'),
            ('\xda','u'),
            ('\x9b','t'),
            ('\x84','?'),
            ('\x92','n'),
            ('\x9d','u'),
            ('\x98','a'),
            ('\x85','u'),
            ('\xc3i','a'),
            ('\xc3s','e'),
            ('\xc3\x9a','U'),
            ('\xc3\x8d','I'),
            ('?','_'),
          )

def reName(ofn,fn):
    if ofn!=fn:
        print "Rename..."
        try:
            print "OldName: %s \t NewName: %s" % (ofn,fn)
            ##os.rename(ofn, fn)
        except OSError, e:
            print "OSError:", e
            sys.exit()

def fixName(fn,parrent=""):
    ofn = fn
    for code in trtable:
        fn = str(fn).replace(code[0], code[1])
        fn = str(fn).replace(code[0], code[1])
        fn = str(fn).replace(code[0], code[1])
    #prrint ("IN", parrent + "/" + ofn)
    if ofn!=fn:
      #print ("OU", fn)
      pass
    return fn

getFiles(sys.argv[1])