from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
corpus = [
'Steve Jobs biological father, Abdulfattah John Jandali.',
'was born into an Arab Muslim household.',
'Jobs moved back to the San Francisco Bay Area.',
]
X = vectorizer.fit_transform(corpus)
print (vectorizer.get_feature_names())
# ['abdulfattah', 'an', 'arab', 'area', 'back', 'bay', 'biological', 'born', 'father', 'francisco', 'household', 'into', 'jandali', 'jobs', 'john', 'moved', 'muslim', 'san', 'steve', 'the', 'to', 'was']
print (X.toarray())
#[[1 0 0 0 0 0 1 0 1 0 0 0 1 1 1 0 0 0 1 0 0 0]
# [0 1 1 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 0 1]
# [0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 1 0 1 0 1 1 0]]