martinctc
5/15/2017 - 4:41 PM

Factor Analysis Notes

Factor Analysis Notes

library(dplyr)
library(magrittr)

cat("\nBegin Factor Analysis\n\n")

setwd("C:/Users/Martin.Chan/Desktop/R workdesk/factor analysis")

df <- read.csv("working data formatted.csv")

dd <-select(df,contains("Q22"))

mymodel<-
  dd %>%
  na.omit() %>%
  factanal(factors=5,rotation="varimax",scores="regression")

mymodel
#Uniqueness = the percentage of the statistical variance for each original variable that isn't explained by the factors.
# A large uniqueness value indicates that none of the latent factors captures a variable well, so smaller values are better.

#Large positive values for loadings mean that there is high correlation
#Kaiser criterion: significant if SS (sum of squares) loadings >1
#(v-f)^2 > v+f where v = variables and f = factors
#The p-value is the probability that the source data perfectly fits the number of factors specified, 
#so larger values are better.

loadings_export <-mymodel$loadings

#Create a matrix and data frame of loadings
loadings.max <- colnames(loadings_export)[apply(loadings_export,1,which.max)] #Find max and return column header
loadings_export<-cbind(loadings_export,loadings.max)
write.csv(loadings_export,"Model Loadings.csv")

#Combine original data with factor scores
scores <- mymodel$scores
length(scores)
dd.new <- cbind(na.omit(dd),scores) 

dd.new$FactorX <-colnames(dd.new[,31:35])[apply(dd.new[,31:35],1,which.max)] #Find max and return column header

#Return mean of each variable, for each factor
meanz <-as.data.frame(1:30)
row.names(meanz)<-colnames(dd.new[,1:30])

for (i in 1:30){
  meanz[i,1]<-mean(filter(dd.new,dd.new$FactorX=="Factor1")[,i])
  meanz[i,2]<-mean(filter(dd.new,dd.new$FactorX=="Factor2")[,i])
  meanz[i,3]<-mean(filter(dd.new,dd.new$FactorX=="Factor3")[,i])
  meanz[i,4]<-mean(filter(dd.new,dd.new$FactorX=="Factor4")[,i])
  meanz[i,5]<-mean(filter(dd.new,dd.new$FactorX=="Factor5")[,i])
}

colnames(meanz)<-c("Factor1","Factor2","Factor3","Factor4","Factor5")


mean(dd.new$Factor1)

write.csv(dd.new,"Factor Analysis Output.csv")

# maxfinder <- function(a,b,c,d){
#   maxes <- max.col(a,b,c,d)
# }

cat("\nEnd demo \n\n")