imnotEnvy
11/14/2017 - 2:59 AM

Remove diacritics 去掉文本中字符的附加符号

Remove diacritics 去掉文本中字符的附加符号

# from Fluent Python chapter 4
import unicodedata
import string

def shave_marks(txt):
  """Remove all diacritics marks"""
  norm_txt = unicodedata.normalize('NFD', txt)
  shaved  = ''.join(c for c in norm_txt
                   if not unicodedata.combining(c))
  return unicodedata.normalize('NFC', shaved)


"""
In [5]: shave_marks('café')
Out[5]: 'cafe'
"""