Reshaping with tidyr
library("dplyr")
library("tidyr")
library("data.table")
smp <- data_frame(
ID = rep(1:3, 2),
BMI = rep(c(21, 26), 3),
sbp = rep(c(150, 120), 3),
nendo = rep(2008:2009, 3)
)
# ID BMI sbp nendo
# 1 1 21 150 2008
# 2 2 26 120 2009
# 3 3 21 150 2008
# 4 1 26 120 2009
# 5 2 21 150 2008
# 6 3 26 120 2009
smp_reshaped <- stats::reshape(as.data.frame(smp), idvar="ID", timevar="nendo", direction="wide")
# ID BMI.2008 sbp.2008 BMI.2009 sbp.2009
# 1 1 21 150 26 120
# 2 2 21 150 26 120
# 3 3 21 150 26 120
smp_reshaped <- dcast.data.table(
as.data.table(smp),
ID ~ nendo,
value.var = c("BMI", "sbp"),
fill = NA,
fun.aggregate = NULL
)
# ID BMI_2008 BMI_2009 sbp_2008 sbp_2009
# 1: 1 21 26 150 120
# 2: 2 21 26 150 120
# 3: 3 21 26 150 120
tidyr_reshape <- function(df, id, by, valiables, sep){
n_key <- df %>% n_distinct(id, by)
if (n_key != nrow(df)){
stop("Duplicate identifiers for rows")
} else if (n_key == nrow(df)){
df <- df %>%
select_(.dots = c(id, by, valiables)) %>%
gather_(key_col="valiables", value_col="value", gather_cols=valiables) %>%
unite_(col="by_valiables", from=c("valiables", by), sep=sep) %>%
spread_(key_col="by_valiables", value_col="value", fill=NA)
}
return(df)
}
smp_reshaped <- tidyr_reshape(smp, "ID", "nendo", c("BMI", "sbp"), "_")
# A tibble: 3 x 5
# ID BMI_2008 BMI_2009 sbp_2008 sbp_2009
# * <int> <dbl> <dbl> <dbl> <dbl>
# 1 1 21 26 150 120
# 2 2 21 26 150 120
# 3 3 21 26 150 120