Keiku
1/6/2017 - 4:35 AM

Reshaping with tidyr

Reshaping with tidyr

library("dplyr")
library("tidyr")
library("data.table")

smp <- data_frame(
  ID = rep(1:3, 2),
  BMI = rep(c(21, 26), 3),
  sbp = rep(c(150, 120), 3),
  nendo = rep(2008:2009, 3)
)
# ID BMI sbp nendo
# 1  1  21 150  2008
# 2  2  26 120  2009
# 3  3  21 150  2008
# 4  1  26 120  2009
# 5  2  21 150  2008
# 6  3  26 120  2009

smp_reshaped <- stats::reshape(as.data.frame(smp), idvar="ID", timevar="nendo", direction="wide")
# ID BMI.2008 sbp.2008 BMI.2009 sbp.2009
# 1  1       21      150       26      120
# 2  2       21      150       26      120
# 3  3       21      150       26      120

smp_reshaped <- dcast.data.table(
  as.data.table(smp),
  ID ~ nendo,
  value.var = c("BMI", "sbp"),
  fill = NA,
  fun.aggregate = NULL
)
#    ID BMI_2008 BMI_2009 sbp_2008 sbp_2009
# 1:  1       21       26      150      120
# 2:  2       21       26      150      120
# 3:  3       21       26      150      120

tidyr_reshape <- function(df, id, by, valiables, sep){
  n_key <- df %>% n_distinct(id, by)
  if (n_key != nrow(df)){
    stop("Duplicate identifiers for rows")
  } else if (n_key == nrow(df)){
    df <- df %>% 
      select_(.dots = c(id, by, valiables)) %>%
      gather_(key_col="valiables", value_col="value", gather_cols=valiables) %>% 
      unite_(col="by_valiables", from=c("valiables", by), sep=sep) %>% 
      spread_(key_col="by_valiables", value_col="value", fill=NA)
  }
  return(df)
}

smp_reshaped <- tidyr_reshape(smp, "ID", "nendo",  c("BMI", "sbp"), "_")
# A tibble: 3 x 5
# ID BMI_2008 BMI_2009 sbp_2008 sbp_2009
# * <int>    <dbl>    <dbl>    <dbl>    <dbl>
# 1     1       21       26      150      120
# 2     2       21       26      150      120
# 3     3       21       26      150      120