R Cheatsheet.
# by(data, factorlist, function)
by(pf$friend_count, pf$gender, summary)
# Getting logical
pf$mobile_check_in <- NA
pf$mobile_check_in <- ifelse(pf$mobile_likes > 0, 1, 0)
percent_mobile <- sum(pf$mobile_check_in)/length(pf$mobile_check_in) * 100
# Getting a sample and analyze it
set.seed(4231)
sample.ids <- sample(levels(yo$id), 16)
# Get 16 samples of the yo$id parameter, we're selecting 16 householders that sells
ggplot(aes(x = time, y = price),
data = subset(yo, id %in% sample.ids)) +
facet_wrap(~id) +
geom_line() +
geom_point(aes(size = all.purchases), pch = 1)
# Scatterplot Matrix
install.packages('GGally')
library(GGally)
set.seed(1836) # We'll get a sample of 1000 rows within the total
pf_subset <- pf[ , c(2:15)]
names(pf_subset)
ggpairs(pf_subset[sample.int(nrow(pf_subset), 1000), ])
ggpairs(pf_subset[sample.int(nrow(pf_subset), 1000), ], axisLabels = 'internal')
ggplot2
Visualization librarymagrittr
Library for using pipe command %>%
(Cmd
+Shift
+M
)tidyr
& dplyr
Data wrangling with Rpander
Render R objects into Pandoc's markdownggthemes
Themes for ggplot2 librarygridExtra
For aggregate different plots with grid.arrange(p1, p2, ..., ncol = 1)
scales
Implement scales in a way that is graphics system agnosticTo install a new package and use it:
install.packages('name_of_the_package', dependencies = T)
library(name_of_the_package)
getwd()
Get Working Directorysetwd('~/Downloads')
Set Working Directoryls()
List variables on Environmentdir()
List directories on Working Directorylist.files()
List files on Working Directoryrm('variable1')
Remove variable1 from Environmentrm(list = ls())
Remove all variables on Environmentidentical(data1, data2)
colnames(data)
Get column names (also names(df)
on data frames)rownames(data)
Get row namesdata(name_dataset)
Load data set into EnvironmentRscript my_script.R
read.csv('file.csv')
Read from CSV to data.frameread.csv('file.tsv', sep = '\t')
Readm from TSV to data.framealumni <- read.csv(path_alumni, na.strings = c('-'), colClasses = c('character', 'character', 'numeric', 'numeric'))
subset(df, <condition>)
Example: subset(statesInfo, state.region == 1)
df[ROWS, COLUMNS]
statesInfo[statesInfo$state.region == 1, ]
statesInfo[statesInfo$state.region == 1 & statesInfo$population > 3000, ]
nrow(df)
ncol(df)
by(data, factorlist, function)
Ex: by(pf$friend_count, pf$gender, summary)
str(data)
Structure of the datasummary(data)
Summary of the datahead(data)
tail(data)
table(variable)
levels(variable)
reddit$age.range <- ordered(reddit$age.range, levels = c('Under 18', '18-24', '25-34', '35-44', '45-54', '55-64', '65 or Above'))
reddit$income.range <- factor(reddit$income.range, levels = c("Under $20,000", "$20,000 - $29,999", "$30,000 - $39,999", "$40,000 - $49,999", "$50,000 - $69,999", "$70,000 - $99,999", "$100,000 - $149,999", "$150,000 or more"), ordered = T)
update.packages(ask=FALSE, checkBuilt = TRUE)
library(devtools)
source_gist("524eade46135f6348140", filename = "ggplot_smooth_func.R")