Halpern interpolation
#================================================================================
#STEP 1: unzip files
#================================================================================
## set dir
setwd("~/Downloads/HALPERN")
## list all files for 2008
files_rescaled_2013 <- list.files(path = "2013_scaled/tif")
print(files_rescaled_2013)
#================================================================================
#STEP 2
#================================================================================
library(proj4)
library(tidyverse)
library(raster)
my_db<- src_postgres(host = "localhost", port = 5432, dbname = "osd_analysis", options = "-c search_path=osd_analysis")
osd2014_amp_mg_intersect <- tbl(my_db, "osd2014_amp_mg_intersect") %>%
collect(n = Inf)
osd2014_metadata <- read_tsv("~/ownCloud/OSD_paper/OSD-GC/data/osd2014_metadata_18-01-2017.tsv",
col_names = TRUE, trim_ws = TRUE,
col_types = list(mrgid = col_character(), biome_id = col_character(),
feature_id = col_character(),material_id = col_character()))
osd2014_metadata_mg <- osd2014_metadata %>% filter(label %in% osd2014_amp_mg_intersect$label)
sc <- cbind(long=osd2014_metadata_mg$start_lon, lat=osd2014_metadata_mg$start_lat) #(long,lat)
r1 <- raster("~/Downloads/HALPERN/2013_scaled/tif_wgs84/global_cumul_impact_2013_all_layers_wgs84.tif")
r <- raster("~/Downloads/HALPERN/2013_scaled/tif/global_cumul_impact_2013_all_layers.tif")
#change WGS84 to Mollweide
oldproj <- "+proj=longlat +datum=WGS84 +no_defs" #WGS84
newproj <- "+proj=moll +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +units=m +no_defs" #Mollweide
wgs.84 <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
# ESRI:54009 world mollweide projection, units = meters
# see http://www.spatialreference.org/ref/esri/54009/
mollweide <- "+proj=moll +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
sp.points <- SpatialPoints(sc, proj4string=CRS(oldproj))
sc <- spTransform(sp.points,CRS(newproj))
#================================================================================
#STEP 3 : values for 2008
#================================================================================
require(rgdal)
library(raster)
#sink("GlobalMinMaxForVar.txt")
GlobalMinMaxForVar13 = data.frame(id=1:length(files_rescaled_2013), VarNames=gsub(".tif", "", files_rescaled_2013), minValue=rep(0,length(files_rescaled_2013)), maxValue=rep(0,length(files_rescaled_2013)))
head(GlobalMinMaxForVar13)
#OUTPUT FILE
out_rescaled_2013 = data.frame(osd_id=osd2014_metadata_mg$label, long=osd2014_metadata_mg$start_lon, lat=osd2014_metadata_mg$start_lat)
m=ncol(out_rescaled_2013)
#LOOP TO READ TIFF ONE BY ONE
for(i in 1:length(files_rescaled_2013)) {
varName = gsub(".tif", "", files_rescaled_2013[i])
print(varName); flush.console()
## Reading in a zip data file without unzipping it
#r <- raster(list.files$Name[2])
f <- paste("~/Downloads/HALPERN/2013_scaled/tif/", files_rescaled_2013[i], sep = "")
r <- raster(f)
print(r); flush.console()
#Get min and max cell values from raster
GlobalMinMaxForVar13$minValue[i] = cellStats(r, min)
GlobalMinMaxForVar13$maxValue[i] = round(cellStats(r, max),3)
#LOOP TO READ TIFF ONE BY ONE
for(k in 1: nrow(sc@coords)){
# if(k==16){
# out_rescaled_2013[k, m+1] = "NA"; colnames(out_rescaled_2013)[m+1] = (paste(varName, "50kms_max", sep="_"))
# out_rescaled_2013[k, m+2] = "NA"; colnames(out_rescaled_2013)[m+2] = (paste(varName, "50kms_min", sep="_"))
# out_rescaled_2013[k, m+3] = "NA"; colnames(out_rescaled_2013)[m+3] = (paste(varName, "10kms_max", sep="_"))
# out_rescaled_2013[k, m+4] = "NA"; colnames(out_rescaled_2013)[m+4] = (paste(varName, "10kms_min", sep="_"))
#} else{
## MEHOD:1
## Extract values from Raster objects
#equivalent to 0.5 degree or 50 kms approx radius
e0 = extract(r, sc[k,], buffer = 100000, weights = TRUE, small = TRUE) %>% unlist
e1 = extract(r, sc[k,], buffer = 50000, weights = TRUE, small = TRUE) %>% unlist
#equivalent to 0.1 degree or 10 kms approx
e2 = extract(r, sc[k,], buffer = 10000, weights = TRUE, small = TRUE) %>% unlist
#equivalent to 0.1 degree or 10 kms approx
e3 = extract(r, sc[k,], buffer = 5000, weights = TRUE, small = TRUE) %>% unlist
e4 = extract(r, sc[k,], buffer = 1000, weights = TRUE, small = TRUE) %>% unlist
out_rescaled_2013[k, m+1] = round(max(e1, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+1] = (paste(varName, "2013_50kms_max", sep="_"))
out_rescaled_2013[k, m+2] = round(mean(e1, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+2] = (paste(varName, "2013_50kms_mean", sep="_"))
out_rescaled_2013[k, m+3] = round(max(e2, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+3] = (paste(varName, "2013_10kms_max", sep="_"))
out_rescaled_2013[k, m+4] = round(mean(e2, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+4] = (paste(varName, "2013_10kms_mean", sep="_"))
out_rescaled_2013[k, m+5] = round(max(e3, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+5] = (paste(varName, "2013_5kms_max", sep="_"))
out_rescaled_2013[k, m+6] = round(mean(e3, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+6] = (paste(varName, "2013_5kms_mean", sep="_"))
out_rescaled_2013[k, m+7] = round(max(e4, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+7] = (paste(varName, "2013_1kms_max", sep="_"))
out_rescaled_2013[k, m+8] = round(mean(e4, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+8] = (paste(varName, "2013_1kms_mean", sep="_"))
out_rescaled_2013[k, m+9] = round(max(e0, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+9] = (paste(varName, "2013_100kms_max", sep="_"))
out_rescaled_2013[k, m+10] = round(mean(e0, na.rm=TRUE),3) ; colnames(out_rescaled_2013)[m+10] = (paste(varName, "2013_100kms_mean", sep="_"))
if(k %in% c(25,50,75,100,125,150,175,200)){cat(k, " | ")}
flush.console()
#}
}
cat("Next!!! \n")
m=ncol(out_rescaled_2013)
}
out_rescaled_2013_mean_long <- out_rescaled_2013 %>%
dplyr::select(osd_id, contains("mean")) %>%
dplyr::rename(label = osd_id) %>%
gather(ohi_variable, mean, -label) %>% tbl_df
library(naturalsort)
out_rescaled_2013_mean_long$ohi_variable <- factor(out_rescaled_2013_mean_long$ohi_variable, levels = unique(out_rescaled_2013_mean_long$ohi_variable) %>% naturalsort())
for (i in 1:13){
fname <- paste("~/Downloads/halpern_mean_", i, ".pdf", sep = "")
pdf(fname, width = 14, height = 3)
p <- ggplot(out_rescaled_2013_mean_long, aes(mean)) +
geom_density(fill = "#333333", alpha = 0.8) +
ggforce::facet_wrap_paginate(~ohi_variable, scales = "free", ncol = 5, nrow = 1, page = i) +
theme_bw() +
theme(strip.text.x = element_text(size = 6),
axis.text = element_text(size = 8))
print(p)
dev.off()
}
out_rescaled_2013_mean_long %>%
filter(is.na(mean)) %>%
group_by(ohi_variable) %>%
count() %>%
separate("ohi_variable", c("variable", "buffer"), sep = "_2013_", remove = TRUE) %>%
mutate(buffer = gsub("s_mean", "", buffer)) %>%
complete(variable, buffer, fill = list(n = 0)) %>%
ggplot(aes(buffer, n)) +
geom_bar(stat = "identity") +
geom_text(aes(label = n), vjust = -0.2, size = 3) +
facet_wrap(~variable, scales = "free_x") +
scale_x_discrete(limits = c("1km", "5km", "10km", "50km", "100km")) +
theme_bw() +
xlab("Extraction buffer") +
ylab("# of NAs")
library(maptools)
data(wrld_simpl)
wrldMoll <- spTransform(wrld_simpl, CRS(newproj))
for(i in 1:length(files_rescaled_2013)) {
varName = gsub(".tif", "", files_rescaled_2013[i])
print(varName); flush.console()
## Reading in a zip data file without unzipping it
#r <- raster(list.files$Name[2])
f <- paste("~/Downloads/HALPERN/2013_scaled/tif/", files_rescaled_2013[i], sep = "")
r <- raster(f)
fname <- paste("~/Downloads/halpern_", varName,".pdf", sep = "")
pdf(fname, width = 7, height = 5)
colors <- rev(colorRampPalette(c(brewer.pal(11, "Spectral"), "#FFFFFF"))(100))
plot(r, col = colors, axes = F, box = F)
plot(wrldMoll, col = "#E5E6E6", border = "#E5E6E6", add = TRUE, box = FALSE)
points(sc, cex = 0.5)
dev.off()
}
out_rescaled_2013_mean_long_5km <- out_rescaled_2013_mean_long %>%
mutate(ohi_variable = gsub("_2013_all_layers", "", ohi_variable)) %>%
mutate(ohi_variable = gsub("_2013_minus_2008", "diff", ohi_variable))
out_rescaled_2013_mean_long_5km <- out_rescaled_2013_mean_long_5km %>%
separate("ohi_variable", c("variable", "buffer"), sep = "_2013_", remove = TRUE) %>%
mutate(buffer = gsub("s_mean", "", buffer))
out_rescaled_2013_mean_long_5km$label <- factor(out_rescaled_2013_mean_long_5km$label, levels = st_100_order_terrestrial)
out_rescaled_2013_mean_long_5km$buffer <- factor(out_rescaled_2013_mean_long_5km$buffer, levels = c("1km", "5km", "10km", "50km", "100km"))
for (i in 1:13){
fname <- paste("~/Downloads/halpern_mean_", i, "_by_sample.pdf", sep = "")
pdf(fname, width = 14, height = 3)
p <- ggplot(out_rescaled_2013_mean_long_5km, aes(label, mean)) +
geom_bar(stat = "identity") +
ggforce::facet_grid_paginate(variable~buffer, scales = "free", ncol = 5, nrow = 1, page = i) +
scale_x_discrete(limits = st_100_order_terrestrial) +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
xlab("") +
ylab("Average")
print(p)
dev.off()
}
out_rescaled_2013_mean_long_lat_long <- out_rescaled_2013_mean_long %>%
filter(is.na(mean)) %>%
separate("ohi_variable", c("variable", "buffer"), sep = "_2013_", remove = TRUE) %>%
mutate(buffer = gsub("s_mean", "", buffer)) %>%
filter(buffer == "10km") %>%
left_join(osd2014_metadata)
wmap<-map_data("world")
pdf("~/Downloads/nas_map_10km.pdf", width = 21, height = 10)
p <- ggplot(wmap, aes(x = long, y = lat)) +
geom_polygon(aes(group = group), fill = "#E5E6E6") +
geom_path(aes(group = group), colour = "#E5E6E6") +
geom_point(data = out_rescaled_2013_mean_long_lat_long, aes(x = start_lon, y = start_lat),
alpha = 0.8) +
xlab("") +
ylab("") +
coord_equal(ratio = 1) +
facet_wrap(~variable) +
theme(panel.background = element_rect(fill = "white", colour = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
legend.position = "top")
print(p)
dev.off()
# Very low impact: <1.4
# Low impact: 1.4-4.95
# Medium impact: 4.95-8.47
# Medium high impact: 8.47-12
# High impact: 12-15.52
# Very high impact: >15.52
# Classify samples in impacted and non-impacted
halpern_impact <- out_rescaled_2013_mean_long %>%
mutate(ohi_variable = gsub("_2013_all_layers", "", ohi_variable)) %>%
mutate(ohi_variable = gsub("_2013_minus_2008", "diff", ohi_variable)) %>%
filter(!is.na(mean)) %>%
separate("ohi_variable", c("variable", "buffer"), sep = "_2013_", remove = TRUE) %>%
mutate(buffer = gsub("s_mean", "", buffer)) %>%
filter(buffer == "5km", variable == "global_cumul_impact") %>%
mutate(class = ifelse(mean <= 1.4, "non-impacted", "impacted"))
halpern_impact_class_min <- halpern_impact %>%
group_by(class) %>%
count %>%
dplyr::slice(which.min(n)) %>%
.$n
sites_non_impact <- halpern_impact %>%
filter(class == "non-impacted") %>%
.$label %>%
droplevels()
sites_rand <- halpern_impact %>%
filter(class == "impacted") %>%
.$label %>%
sample(.,halpern_impact_class_min) %>%
droplevels()
filt <- osd2014_16s_prevalence %>%
filter(total_counts >= 10, prev > 2) %>% .$otu_name %>% droplevels()
sites_rm <- c("OSD63_2014-06-20_0m_NPL022",
"OSD167_2014-06-21_0.1m_NPL022",
"OSD128_2014-06-21_1m_NPL022",
"OSD80_2014-06-21_2m_NPL022")
sites_rm <- sample_names(osd2014_16s_otuXsample_physeq_filt_prev_beta_mg)[!(sample_names(osd2014_16s_otuXsample_physeq_filt_prev_beta_mg) %in% sites_rm)]
osd2014_16s_otuXsample_physeq_filt_prev_beta_filt <- prune_taxa(filt %>% as.vector(), osd2014_16s_otuXsample_physeq_filt_prev_beta_mg)
osd2014_16s_otuXsample_physeq_filt_prev_beta_filt <- prune_samples(sites_rm,osd2014_16s_otuXsample_physeq_filt_prev_beta_filt)
osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css <- cssTrans(osd2014_16s_otuXsample_physeq_filt_prev_beta_filt, norm = T, log = F)
osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css_m <- t(as(otu_table(osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css), "matrix"))
plot(hclust(dist(osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css_m), "ward.D"))
pca <- prcomp((osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css_m), center=F)
osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css_m_hel <- decostand(osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css_m, "total")
osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css_m_hel <- decostand(osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css_m_hel, "hellinger")
mdsplot <- metaMDS(osd2014_16s_otuXsample_physeq_filt_prev_beta_filt_css_m_hel, distance = "bray")
halpern_impact_all <- out_rescaled_2013_mean_long %>%
mutate(ohi_variable = gsub("_2013_all_layers", "", ohi_variable)) %>%
mutate(ohi_variable = gsub("_2013_minus_2008", "diff", ohi_variable)) %>%
filter(!is.na(mean)) %>%
separate("ohi_variable", c("variable", "buffer"), sep = "_2013_", remove = TRUE) %>%
mutate(buffer = gsub("s_mean", "", buffer)) %>%
filter(buffer == "10km")
test <- melt(scores(pca, display = "sites")) %>%
dplyr::rename(label = Var1) %>%
filter(Var2 == "NMDS1") %>%
left_join(halpern_impact_all) %>%
dplyr::select(label, value, variable, mean) %>%
group_by(variable) %>%
do(fit = Hmisc::rcorr(.$value, .$mean))
test %>% broom::tidy(fit) %>%
filter(p.value < 0.01)
otutable <- t(as(otu_table(osd2014_16s_otuXsample_physeq_filt_prev_beta), "matrix"))
otutable <- base::as.data.frame(otutable)
otutable <- otutable[,filt]
names_otus <- names(otutable)
names_otus_short <- paste("otu", seq(1:length(names_otus)), sep = "_")
names(otutable) <- names_otus_short
otutable$label <- base::row.names(otutable)
otutable <- otutable %>% filter(label %in% sites_non_impact | label %in% sites_rand)
otutable <- otutable %>% left_join(halpern_impact %>% dplyr::select(label, class))
class_impact <- otutable %>% dplyr::select(label, class)
base::row.names(otutable) <- otutable$label
otutable$label <- NULL
otutable$class <- NULL
library(parallelRandomForest)
set.seed(2)
halpern_classify <- randomForest::randomForest(y = as.factor(class_impact$class),
x = otutable, ntree=500, importance=TRUE, do.trace=100)
print(halpern_classify)
imp <- importance(halpern_classify)
imp <- data.frame(predictors = rownames(imp), imp)
imp.sort <- arrange(imp, desc(MeanDecreaseGini))
imp.sort$predictors <- factor(imp.sort$predictors, levels = imp.sort$predictors)
# Select the top 10 predictors
imp.20 <- imp.sort[1:20, ]
# ggplot
ggplot(imp.20, aes(x = predictors, y = MeanDecreaseGini)) +
geom_bar(stat = "identity", fill = "indianred") +
coord_flip() +
ggtitle("Most important OTUs for classifying Erie samples\n into nearshore or offshore")