Remove punctuations and stop words from a sentence.
import string
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
import re
def preprocess(sentence):
sentence = sentence.lower()
tokenizer = RegexpTokenizer(r'\w+')
tokens = tokenizer.tokenize(sentence)
filtered_words = [w for w in tokens if not w in stopwords.words('english')]
return " ".join(filtered_words)
sentence = "At eight o'clock on Thursday morning Arthur didn't feel very good. French-Fries"
print preprocess(sentence)