# coding=utf-8
def trunc(s, limit, coding="UTF-8", postfix="..."):
'''
works both on python2 and python3
'''
unicode_s = s.decode(coding) if type(s) == bytes else s
nums = (len(u.encode(coding)) for u in unicode_s)
sum, i = 0, 0
use_postfix = ""
for i,n in enumerate(nums):
if sum+n > limit:
use_postfix = postfix
break
else:
sum += n
return unicode_s[:i] + use_postfix
# py2
a = u"你好世界," * 100
trunc_a = trunc(a, 50)
print(trunc_a)
b = "你好世界," * 100
trunc_b = trunc(b, 50)
print(trunc_b)
c = u'你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
trunc_c = trunc(c, 20, coding="gb2312")
print(trunc_c)
# py3
# a = "你好世界," * 100
# trunc_a = trunc(a, 50)
# print(trunc_a)
# b = bytes("你好世界," * 100, "UTF-8")
# trunc_b = trunc(b, 50)
# print(trunc_b)
# c = '你好上你你你你你好上海,好上海,好上海,好上海,好上海,海'.encode("gb2312")
# trunc_c = trunc(c, 20, coding="gb2312")
# print(trunc_c)