11/21/2018 - 5:52 PM

## spread with multiple columns dplyr

spread long to wide with multiple value columns ## walk through from kieren healy https://kieranhealy.org/blog/archives/2018/11/06/spreading-multiple-values/

``````
gen_cats <- function(x, N = 1000) {
sample(x, N, replace = TRUE)
}

set.seed(101)
N <- 1000

income <- rnorm(N, 100, 50)

vars <- list(stratum = c(1:8),
sex = c("M", "F"),
race =  c("B", "W"),
educ = c("HS", "BA"))

df <- as_tibble(map_dfc(vars, gen_cats))

## stratum, sex, race, educ, income
# datatable way is easy peasy
data.table::setDT(df)
dt_wide <- data.table::dcast(df, sex + race + stratum ~ educ,
fun = list(mean, length),
value.var = "income")

# dplyr
## Simple tidy summary
tv_wide1 <- df %>% group_by(sex, race, stratum, educ) %>%
summarize(mean_inc = mean(income), N = n())

## 1. gather
## 1. gather()
tv_wide2 <- df %>% group_by(sex, race, stratum, educ) %>%
summarize(mean_inc = mean(income), N = n()) %>%
gather(variable, value, -(sex:educ))

tv_wide2

## 2. unite()
tv_wide2 <- df %>% group_by(sex, race, stratum, educ) %>%
summarize(mean_inc = mean(income), N = n()) %>%
gather(variable, value, -(sex:educ)) %>%
unite(temp, educ, variable)

tv_wide2

tv_wide2 <- df %>% group_by(sex, race, stratum, educ) %>%
summarize(mean_inc = mean(income), N = n()) %>%
gather(variable, value, -(sex:educ)) %>%
unite(temp, educ, variable) %>%

tv_wide2

multi_spread <- function(df, key, value) {
# quote key
keyq <- rlang::enquo(key)
# break value vector into quotes
valueq <- rlang::enquo(value)
s <- rlang::quos(!!valueq)
df %>% gather(variable, value, !!!s) %>%
unite(temp, !!keyq, variable) %>%