Remove diacritics 去掉文本中字符的附加符号
# from Fluent Python chapter 4
import unicodedata
import string
def shave_marks(txt):
"""Remove all diacritics marks"""
norm_txt = unicodedata.normalize('NFD', txt)
shaved = ''.join(c for c in norm_txt
if not unicodedata.combining(c))
return unicodedata.normalize('NFC', shaved)
"""
In [5]: shave_marks('café')
Out[5]: 'cafe'
"""