library(tm)
cleanCorpus <- function(corpus){
corpus.tmp <- tm_map(corpus, removePunctuation)
corpus.tmp <- tm_map(corpus.tmp,stripWhitespace)
corpus.tmp <- tm_map(corpus.tmp,tolower)
corpus.tmp <- tm_map(corpus.tmp, PlainTextDocument)
corpus.tmp <- tm_map(corpus.tmp,removeWords, stopwords("english"))
return(corpus.tmp)
}
# Build TDM
generateTDM <- function(x){
s.cor <- Corpus(VectorSource(x))
s.cor.cl <- cleanCorpus(s.cor)
s.tdm <- TermDocumentMatrix(s.cor.cl)
return(s.tdm)
}