[dplyr Cheatsheet Some commands for dplyr #r #dplyr #cheatsheet
#-- convert df to tibble
tbl <- tbl_df(df)
#-- aggregate by multiple groups
d <- df %>% filter(<filter_conditional>) %>%
group_by(<grouping_variable1>, ...) %>%
summarise(<out_var> = <out_var_function_call>,...)
#-- get crosstabs table and replace low counts
tab_ldp <- d %>%
rename(dis = normalised_disease_name) %>%
group_by(gmc, ldp, dis) %>%
summarise(n = n()) %>%
spread(dis, n) %>%
mutate_if(is.numeric, function(x) replace(x, x %in% 1:5, "<5"))
#-- select only columns that start with m
d <- df %>% select(starts_with("m"))
#-- mutate weight to be in kgs and create new var
mtcars <- mtcars %>% mutate(wt = wt * 1000, good_mpg=ifelse(mpg > 25,"good","bad"))
#-- equivalent of do.call("rbind", list(dfs))
bind_rows(df1, df2)
bind_rows(list(df1, df2))
bind_rows(list_of_dfs, .id = "id") # .id argument allows to specify a new column name to preserve the original list names
#-- get the first or last or nth row of the group
d <- df %>%
group_by(grp_var) %>%
arrange(desc(order_var)) %>%
slice(1) # or could use n() to get the number of rows in that group
#-- to just get the most recent/max row
df <- df %>% group_by(grp_var) %>%
slice(which.max(order_var))
#-- long list to crosstab/pivot
out <- d %>% group_by(week, lab) %>% summarize(count = n()) %>% spread(lab, count)
#-- change class of number of specific columns (specified in a vecotr of column names
d <- d %>% mutate_at(vars(one_of(convert_to_numeric)), funs(as.numeric))