epcim
10/23/2015 - 12:43 PM

text-removeDiacritics.py.save.sh

# -*- coding: utf-8 -*-
#!/bin/python


import os,sys
import unicodedata

def getFiles(dir,parrent=""):
    foundFiles = []

    if dir[-1:] == "/":
      dir = dir[0:-1]

    for node in os.listdir(dir):
      
      if os.path.isdir(dir + "/" + node):
        fixedNode = fixName(node,parrent=parrent)
        reName(dir + "/" + node, dir + "/" + fixedNode)
        getFiles( dir + "/" + fixedNode, parrent= dir + "/" + fixedNode )

      if os.path.isfile(dir + "/" + node):
        fixedNode = fixName(node,parrent=parrent)
        reName(dir + "/" + node, dir + "/" + fixedNode)
        pass
        
      if node!=fixedNode:
        print "INFO: ", parrent + "/" + node
        print "FIXE: ", parrent + "/" + fixedNode


# ěščřžýáíé
trtable = (
            ('\xa0','a'), ('\xd8','e'),
            ('\xac','c'), ('\xa1','i'), 
            #('\x85','u'), 
            ('\xe9','u'), 
            ('\xfd','r'), 
            ('\xa7','z'), 
            ('\xbe','z'), 
            ('\xa2','o'), 
            ('\xe6','s'), 
            ('\xec','y'), 
            ('\xe1','a'), 
            ('\xed','i'), 
            ('\xf8','r'), 
            ('\xa9','s'), 
            ('\xb9','s'), 
            ('\xe8','c'), 
            ('\xf2','n'), 
            ('\xc8','c'), 
            ('\xbb','t'), 
            ('\xa6','z'), 
            ('\xa0','a'), 
            ('\x9f','c'), 
            ('\xe7','s'), 
            ('\xa3','a'), 
            ('\x82','y'), 
            ('\xd4','z'), 
            ('\xe7','s'), 
            ('\x9c','t'), 
            ('\xe5','n'), 
            ('\xe0','o'), 
            ('\xf9','u'), 
            ('\xd6','I'), 
            ('\xef',''), 
            ('\x93','u'), 
            ('\x96','l'), 
            ('\xb4',''), 
            ('\xfa','u'), 
            ('\xff','i'), 
            ('\xae','z'), 
            ('\xfc','r'), 
            ('\xab','c'), 
            ('\x87','s'), 
            ('\xf3','i'), 
            ('\xf5','z'), 
            ('\xe2','d'), 
            ('\xda','u'), 
            ('\x9b','t'), 
            ('\x84','?'), 
            ('\x92','n'), 
            ('\x9d','u'), 
            ('\x98','a'), 
            ('\x85','u'), 
            ('\xc3i','a'), 
            ('\xc3s','e'), 
            ('\xc3\x9a','U'), 
            ('\xc3\x8d','I'),
	    ('\xc4\x8dn\xc3\xad','c'),
            ('\xc5\xaf','u'),
            ('\xc3\xad','i'),
            ('',''),
            ('',''),
            ('',''),
            ('',''),
            ('',''),
            ('',''), 
          )


def reName(ofn,fn):
    if ofn!=fn:
      try:
        os.rename(ofn, fn)
      except OSError, e:
        print "OSError:", e
        sys.exit()

def fixName(fn,parrent=""):
    ofn = fn
    for code in trtable:
        fn = str(fn).replace(code[0], code[1])
        fn = str(fn).replace(code[0], code[1])
        fn = str(fn).replace(code[0], code[1])
    print ("IN", parrent + "/" + ofn)
    if ofn!=fn:
      #print ("OU", fn)
      pass
    return fn

getFiles(sys.argv[1])