dplyr
There are seven fundamental functions of data transformation:
select() selecting variables
filter() provides basic filtering capabilities
group_by() groups data by categorical levels
summarise() summarise data by functions of choice
arrange() ordering data
join() joining separate dataframes
mutate() create new variables
####################################################################################
http://rpubs.com/NateByers/DataManip1
library(devtools)
install_github("NateByers/region5air")
library(region5air)
data(airdata)
head(airdata)
airdata=airdata %>% select(site, datetime, parameter, value) #using select to select 4 columns
OR select(airdata, site, datetime:value)
airdata %>% arrange(datetime, parameter,site,value) #sorting according to these columns
%>% arrange(airdata, desc(site), datetime) #sorting with descending order in site
airdata %>% filter(parameter == 44201, site==840170311601) #filtering
#replacement of: airdata[airdata$parameter == 44201 & airdata$site == 840170311601, ]
%>% filter(airdata, parameter == 44201, poc == 1 | poc == 2)
trainingData %>%
filter(Date %in% distinctMissingDates) %>%
group_by(Date) %>%
summarize(total.count = n(),
count.open = sum(Open == 1),
count.promo = sum(Promo == 1))
imputedData %>% inner_join(missingDateFlags, by = "Date")
by = c("state_code"= "State")
data frame df:
col1
row1
row2
row3
row4
row5
data frame df1:
col1
row3
row5
Then the desired output df2 is:
col1
row1
row2
row4
anti_join
anti_join(df, df1, by='col1')