undercoverindian
12/27/2017 - 12:23 AM

DPYLR

Using the DPYLR tools

- combine columns from multiple data sources
- merge identifying information

LEFT-JOIN = returns all rows in x, all columns for x and y

  joined <- left_join(x, y, by = 'identifier')
  
      // by = c(column_name_first_set = column_name_second_set)
- Used to pass result of one command to first argument of next command

lowest.ratio <- filter(teams, steal.ratio == min(steal.ratio)) %>% select(Team)
SELECT: filtered.data <- select(data.frame, column1, column2)

         // interesting because column names not passed as strings


FILTER: filtered.data <- filter(data.frame, test1, test2)
        storms <- filter(storms, pressure > 1015)

        // to find specific item within filter: 
          - Team == team1 | Team == team2
          - Team %in% c(team1, team2)


MUTATE: filtered.data <- mutate(data.frame, new.column = formula)
        storms <- mutate(storms, ratio = pressure/wind, inverse = 1/ratio)


ARRANGE: filtered.data <- arrange(data.frame, column.name)
         storms <- arrange(storms, -wind)


SUMMARISE: filtered.data <- summarise(data.frame, function = function(column))
           summary <- summarise(pollution, median = median(amount))

GROUP BY: filtered.data <- group_by(data.frame, column.name) %>% 
                           summarize(amount = mean(amount))