JGuizard
4/25/2016 - 10:07 AM

Remove punctuations and stop words from a sentence.

Remove punctuations and stop words from a sentence.

import string
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
import re

def preprocess(sentence):
	sentence = sentence.lower()
	tokenizer = RegexpTokenizer(r'\w+')
	tokens = tokenizer.tokenize(sentence)
	filtered_words = [w for w in tokens if not w in stopwords.words('english')]
	return " ".join(filtered_words)

sentence = "At eight o'clock on Thursday morning Arthur didn't feel very good. French-Fries"
print preprocess(sentence)