R Cheatsheet.
# by(data, factorlist, function)
by(pf$friend_count, pf$gender, summary)
# Getting logical
pf$mobile_check_in <- NA
pf$mobile_check_in <- ifelse(pf$mobile_likes > 0, 1, 0)
percent_mobile <- sum(pf$mobile_check_in)/length(pf$mobile_check_in) * 100
# Getting a sample and analyze it
set.seed(4231)
sample.ids <- sample(levels(yo$id), 16)
# Get 16 samples of the yo$id parameter, we're selecting 16 householders that sells
ggplot(aes(x = time, y = price),
data = subset(yo, id %in% sample.ids)) +
facet_wrap(~id) +
geom_line() +
geom_point(aes(size = all.purchases), pch = 1)
# Scatterplot Matrix
install.packages('GGally')
library(GGally)
set.seed(1836) # We'll get a sample of 1000 rows within the total
pf_subset <- pf[ , c(2:15)]
names(pf_subset)
ggpairs(pf_subset[sample.int(nrow(pf_subset), 1000), ])
ggpairs(pf_subset[sample.int(nrow(pf_subset), 1000), ], axisLabels = 'internal')ggplot2 Visualization librarymagrittr Library for using pipe command %>% (Cmd+Shift+M)tidyr & dplyr Data wrangling with Rpander Render R objects into Pandoc's markdownggthemes Themes for ggplot2 librarygridExtra For aggregate different plots with grid.arrange(p1, p2, ..., ncol = 1)scales Implement scales in a way that is graphics system agnosticTo install a new package and use it:
install.packages('name_of_the_package', dependencies = T)
library(name_of_the_package)
getwd() Get Working Directorysetwd('~/Downloads') Set Working Directoryls() List variables on Environmentdir() List directories on Working Directorylist.files() List files on Working Directoryrm('variable1') Remove variable1 from Environmentrm(list = ls())Remove all variables on Environmentidentical(data1, data2)colnames(data) Get column names (also names(df) on data frames)rownames(data) Get row namesdata(name_dataset) Load data set into EnvironmentRscript my_script.Rread.csv('file.csv') Read from CSV to data.frameread.csv('file.tsv', sep = '\t') Readm from TSV to data.framealumni <- read.csv(path_alumni, na.strings = c('-'), colClasses = c('character', 'character', 'numeric', 'numeric'))subset(df, <condition>) Example: subset(statesInfo, state.region == 1)df[ROWS, COLUMNS]
statesInfo[statesInfo$state.region == 1, ]statesInfo[statesInfo$state.region == 1 & statesInfo$population > 3000, ]nrow(df)ncol(df)by(data, factorlist, function) Ex: by(pf$friend_count, pf$gender, summary)str(data) Structure of the datasummary(data) Summary of the datahead(data)tail(data)table(variable)levels(variable)reddit$age.range <- ordered(reddit$age.range, levels = c('Under 18', '18-24', '25-34', '35-44', '45-54', '55-64', '65 or Above'))reddit$income.range <- factor(reddit$income.range, levels = c("Under $20,000", "$20,000 - $29,999", "$30,000 - $39,999", "$40,000 - $49,999", "$50,000 - $69,999", "$70,000 - $99,999", "$100,000 - $149,999", "$150,000 or more"), ordered = T)update.packages(ask=FALSE, checkBuilt = TRUE)
library(devtools)
source_gist("524eade46135f6348140", filename = "ggplot_smooth_func.R")