Natural language toolkit
Lemmatize
string = 'some string of characters'
from nltk.stem import WordNetLemmatizer
#instantiate
lemmatizer = WordNetLemmatizer()
#Before we can lemmitize our spam string we need to tokenize it.
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'\w+')
#What is our tokenizer doing? Is anyone familiar with regex?
string_tokens = tokenizer.tokenize(string.lower()) #creates list of tokens corresponding
#to each word. All lowercased.
tokens_lem = [lemmatizer.lemmatize(i) for i in string_tokens] #lemmatizes individual words