def prepare_data(seqs,labels):
"""
create the matrics from the datasets
this pad each sequence to the same length:the length of the longest seuence or maxlen.
if maxlen is set,we will out all sequence to this maximum length.
this swap the axis
"""
#x:a list of sentences
lengths = [len(s) for s in seqs]
n_samples = len(seqs)
maxlen = numpy.max(lengths)
x = numpy.zeros((maxlen,n_samples)).astype('int64')
x_mask = numpy.ones((maxlen,n_samples)).astype(theano.config.floatx)
for idx,s in enumerate(seqs):
x[:lengths[idx],idx] = s
x_mask *= (1-(x == 0)) #构建mask矩阵的绝佳技巧
return x,x_mask,labels