Using the DPYLR tools
- combine columns from multiple data sources
- merge identifying information
LEFT-JOIN = returns all rows in x, all columns for x and y
joined <- left_join(x, y, by = 'identifier')
// by = c(column_name_first_set = column_name_second_set)
- Used to pass result of one command to first argument of next command
lowest.ratio <- filter(teams, steal.ratio == min(steal.ratio)) %>% select(Team)
SELECT: filtered.data <- select(data.frame, column1, column2)
// interesting because column names not passed as strings
FILTER: filtered.data <- filter(data.frame, test1, test2)
storms <- filter(storms, pressure > 1015)
// to find specific item within filter:
- Team == team1 | Team == team2
- Team %in% c(team1, team2)
MUTATE: filtered.data <- mutate(data.frame, new.column = formula)
storms <- mutate(storms, ratio = pressure/wind, inverse = 1/ratio)
ARRANGE: filtered.data <- arrange(data.frame, column.name)
storms <- arrange(storms, -wind)
SUMMARISE: filtered.data <- summarise(data.frame, function = function(column))
summary <- summarise(pollution, median = median(amount))
GROUP BY: filtered.data <- group_by(data.frame, column.name) %>%
summarize(amount = mean(amount))