04pallav
9/13/2017 - 9:21 PM

Wordcloud from text

Wordcloud from text


library("tm")
library("SnowballC")
library("wordcloud")
jeopCorpus <- Corpus(VectorSource(technical$Subject)) #technical$subject contains the text
jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument)
jeopCorpus <- tm_map(jeopCorpus, removePunctuation)
jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english'))
jeopCorpus <- tm_map(jeopCorpus, stemDocument)
wordcloud(jeopCorpus, max.words = 100, random.order = FALSE)

dtm <- TermDocumentMatrix(jeopCorpus)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)