amy17519
8/22/2016 - 3:33 AM

Starbucks | R Shiny | K-Means Clustering

  output$kmeans <- renderPlotly({
    plot_ly(data = mug, x = Owner, y = Seeker, mode = "markers",
            text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
                        "<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
                        Seeker,"<br>Trader",Trader, "<br>Difficulty: ",Difficulty),
            color = mugCluster$cluster, 
            colors =c('olivedrab','navyblue','indianred2','darkgoldenrod1','magenta4')) %>% 
      layout(title='K-means Clustering: # of Seekers vs. # of Owners')
  })
wss <- (nrow(mug[,7:9])-1)*sum(apply(mug[,7:9],2,var))
for (i in 2:10) wss[i] <- sum(kmeans(mug[,7:9], 
                                     centers=i)$withinss)
plot(1:10, wss, type="b", xlab="Number of Clusters",
     ylab="Within groups sum of squares")
#determines k=5

set.seed(123)
mugCluster <- kmeans(mug[, 7:9], 5, nstart = 100)

#observe patterns in each cluster, rename clusters by their characteristics, then #use it as a new variable: Difficulty
mugCluster$centers
mugCluster$cluster <- factor(mugCluster$cluster,levels=c(2,1,5,4,3))
levels(mugCluster$cluster) <- c("Inconclusive","Easy to Find Mugs",'Medium Difficulty',
                                "Hard to Get Mugs","Very Hard to Get Mugs")
mug$Difficulty <- mugCluster$cluster