EdisonChendi
10/20/2016 - 4:27 AM

trunc.py

# coding=utf-8

def trunc(s, limit, coding="UTF-8", postfix="..."):
    '''
    works both on python2 and python3
    '''
    unicode_s = s.decode(coding) if type(s) == bytes else s
    nums = (len(u.encode(coding)) for u in unicode_s)
    sum, i = 0, 0
    use_postfix = ""
    for i,n in enumerate(nums):
        if sum+n > limit:
            use_postfix = postfix
            break
        else:
            sum += n
    return unicode_s[:i] + use_postfix
 
# py2
a = u"你好世界," * 100
trunc_a = trunc(a, 50)
print(trunc_a)
b = "你好世界," * 100
trunc_b = trunc(b, 50)
print(trunc_b)
c = u'你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
trunc_c = trunc(c, 20, coding="gb2312")
print(trunc_c)

# py3
# a = "你好世界," * 100
# trunc_a = trunc(a, 50)
# print(trunc_a)
# b = bytes("你好世界," * 100, "UTF-8")
# trunc_b = trunc(b, 50)
# print(trunc_b)
# c = '你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
# trunc_c = trunc(c, 20, coding="gb2312")
# print(trunc_c)