arbelt
2/16/2017 - 5:39 PM

Merging duplicate records with different fields filled in.R

library(tidyverse, purrr)

df <- tribble(~a, ~b, ~c, ~d,
              1, 2, NA, 1,
              1, 2, 3, NA,
              2, 2, 1, 1,
              2, 2, 1, 2)

keys_to_merge <- df %>% group_by(a,b) %>% 
  summarise_each(funs(sum(unique(.) %>% is.na %>% `!`))) %>% 
  group_by(a, b) %>% nest %>% 
  mutate_at("data", funs(map(., ~ .x < 2))) %>%
  mutate_at("data", funs(map(., as.logical))) %>%
  unnest %>%
  group_by(a,b) %>% summarise(to_get = all(data)) %>% ungroup %>%
  filter(to_get) %>% distinct(a,b)

merged_vals <- keys_to_merge %>% 
  left_join(df) %>% 
  group_by(a,b) %>%
  summarise_all(funs(detect(., negate(is.na)))) 

unmerged_vals <- df %>% anti_join(keys_to_merge)

merged_df <- bind_rows(merged_vals, unmerged_vals)