dahelg
1/31/2018 - 2:11 PM

Small example hot to convert german special characters from unicode to utf-8 and back to unicode

Small example hot to convert german special characters from unicode to utf-8 and back to unicode

# Small example how to convert german special characters from unicode to utf-8 and back to unicode
# http://www.utf8-zeichentabelle.de/unicode-utf8-table.pl?start=128&number=128&names=-&utf8=string-literal
#

umlaute_dict = {
    '\xc3\xa4': 'ae',  # U+00E4	   \xc3\xa4
    '\xc3\xb6': 'oe',  # U+00F6	   \xc3\xb6
    '\xc3\xbc': 'ue',  # U+00FC	   \xc3\xbc
    '\xc3\x84': 'Ae',  # U+00C4	   \xc3\x84
    '\xc3\x96': 'Oe',  # U+00D6	   \xc3\x96
    '\xc3\x9c': 'Ue',  # U+00DC	   \xc3\x9c
    '\xc3\x9f': 'ss',  # U+00DF	   \xc3\x9f
}


def replace_german_umlaute(unicode_string):

    utf8_string = unicode_string.encode('utf-8')

    for k in umlaute_dict.keys():
        utf8_string = utf8_string.replace(k, umlaute_dict[k])

    return utf8_string.decode()