Simple example of classifying text in R with machine learning (text-mining library, caret, and bayesian generalized linear model) #R #machin

3/3/2018 - 3:40 AM

Simple example of classifying text in R with machine learning (text-mining library, caret, and bayesian generalized linear model) #R #machin

Simple example of classifying text in R with machine learning (text-mining library, caret, and bayesian generalized linear model) #R #machinelearning #clasification

classifytext.R

library(caret)
library(tm)

# Training data.
data <- c('Cats like to chase mice.', 'Dogs like to eat big bones.')
corpus <- VCorpus(VectorSource(data))

# Create a document term matrix.
tdm <- DocumentTermMatrix(corpus, list(removePunctuation = TRUE, stopwords = TRUE, stemming = TRUE, removeNumbers = TRUE))

# Convert to a data.frame for training and assign a classification (factor) to each document.
train <- as.matrix(tdm)
train <- cbind(train, c(0, 1))
colnames(train)[ncol(train)] <- 'y'
train <- as.data.frame(train)
train$y <- as.factor(train$y)

# Train.
fit <- train(y ~ ., data = train, method = 'bayesglm')

# Check accuracy on training.
predict(fit, newdata = train)

# Test data.
data2 <- c('Bats eat bugs.')
corpus <- VCorpus(VectorSource(data2))
tdm <- DocumentTermMatrix(corpus, control = list(dictionary = Terms(tdm), removePunctuation = TRUE, stopwords = TRUE, stemming = TRUE, removeNumbers = TRUE))
test <- as.matrix(tdm)

# Check accuracy on test.
predict(fit, newdata = test)

classifytext.r

library(caret)
library(tm)

# Training data.
data <- c('Cats like to chase mice.', 'Dogs like to eat big bones.')
corpus <- VCorpus(VectorSource(data))

# Create a document term matrix.
tdm <- DocumentTermMatrix(corpus, list(removePunctuation = TRUE, stopwords = TRUE, stemming = TRUE, removeNumbers = TRUE))

# Convert to a data.frame for training and assign a classification (factor) to each document.
train <- as.matrix(tdm)
train <- cbind(train, c(0, 1))
colnames(train)[ncol(train)] <- 'y'
train <- as.data.frame(train)
train$y <- as.factor(train$y)

# Train.
fit <- train(y ~ ., data = train, method = 'bayesglm')

# Check accuracy on training.
predict(fit, newdata = train)

# Test data.
data2 <- c('Bats eat bugs.')
corpus <- VCorpus(VectorSource(data2))
tdm <- DocumentTermMatrix(corpus, control = list(dictionary = Terms(tdm), removePunctuation = TRUE, stopwords = TRUE, stemming = TRUE, removeNumbers = TRUE))
test <- as.matrix(tdm)

# Check accuracy on test.
predict(fit, newdata = test)

results.txt

> data
[1] "Cats like to chase mice."    "Dogs like to eat big bones."
> train
  big bone cat chase dog eat like mice y
1   0    0   1     1   0   0    1    1 0
2   1    1   0     0   1   1    1    0 1
> predict(fit, newdata = train)
[1] 0 1
> data2
[1] "Bats eat bugs."
> test
  big bone cat chase dog eat like mice
1   0    0   0     0   0   1    0    0
> predict(fit, newdata = test)
[1] 1
>

Cacher is the code snippet organizer for pro developers

We empower you and your team to get more done, faster

Simple example of classifying text in R with machine learning (text-mining library, caret, and bayesian generalized linear model) #R #machin