yajiez
10/14/2017 - 2:27 PM

word_cloud.R

library(tm)

cleanCorpus <- function(corpus){
    corpus.tmp <- tm_map(corpus, removePunctuation)
    corpus.tmp <- tm_map(corpus.tmp,stripWhitespace)
    corpus.tmp <- tm_map(corpus.tmp,tolower)
    corpus.tmp <- tm_map(corpus.tmp, PlainTextDocument)
    corpus.tmp <- tm_map(corpus.tmp,removeWords, stopwords("english"))
    return(corpus.tmp)
}

# Build TDM
generateTDM <- function(x){
    s.cor <- Corpus(VectorSource(x))
    s.cor.cl <- cleanCorpus(s.cor)
    s.tdm <- TermDocumentMatrix(s.cor.cl)
    return(s.tdm)
}