tamdrashtri
12/19/2018 - 7:23 AM

useful code for logistic regression in R

useful code for logistic regression in R

# Feature Engineering -----------------------------------------------------

# use recipes to quickly turn features into formats that are favourable for logistic regression

recObj <- recipe(Churn ~ ., data = train) %>% # chose Churn as the response variable and the rest are explainatory variables
  step_dummy(all_nominal()) %>% # convert string to factor
  step_BoxCox(all_numeric()) %>%
  prep(data = train) # prepare for this data

bakedTrain <- bake(recObj, new_data = train) # use bake to transform according to recipes
bakedTest <- bake(recObj, new_data = test)
# Model Performance -------------------------------------------------------

library(ROCR)

# draw ROC curve
prediction(test.predicted.m1, test$Churn) %>%
  performance(measure = "tpr", x.measure = "fpr") %>% #tpr specifies true positive rate, fpr represents false positive rate.
  plot()

# get the AUC value
prediction(test.predicted.m1, test$Churn) %>%
  performance(measure = "auc") %>%
  .@y.values #0.85, 0.8414

# glm performs logistic regression to test the model

model1 <- glm(Churn_Yes ~ ., family = "binomial", data = bakedTrain)

# take a look at the model
summary(model1)

# organize variable importance
col_index <-
  data.frame(varImp(model1)) %>%
  mutate(names = factor(row.names(.))) %>%
  arrange(-Overall)

# draw varImp plot using ggplot
ggplot(col_index, aes(x = reorder(names), y = Overall)) +
  geom_segment(aes(xend = names, yend = 0)) +
  geom_point() +
  coord_flip() +
  scale_color_viridis() +
  theme_linedraw()
# Test model --------------------------------------------------------------
# using the function predict to test it on the test data set
test.predicted.m1 <- predict(model1, newdata = bakedTest, type = "response")

# get the RSME value
test %>%
  mutate(m1.pred = ifelse(test.predicted.m1 > 0.5, "Yes", "No")) %>%
  summarise(m1.error = mean(Churn != m1.pred))