Loess data normalization based on time-line, includes plotting normalized vs non-normalized data
##
## mclapply.hack.R
##
## Nathan VanHoudnos
## nathanvan AT northwestern FULL STOP edu
## July 14, 2014
##
## A script to implement a hackish version of
## parallel:mclapply() on Windows machines.
## On Linux or Mac, the script has no effect
## beyond loading the parallel library.
require(parallel)
## Define the hack
mclapply.hack <- function(...) {
## Create a cluster
size.of.list <- length(list(...)[[1]])
cl <- makeCluster( min(size.of.list, detectCores()) )
## Find out the names of the loaded packages
loaded.package.names <- c(
## Base packages
sessionInfo()$basePkgs,
## Additional packages
names( sessionInfo()$otherPkgs ))
tryCatch( {
## Copy over all of the objects within scope to
## all clusters.
this.env <- environment()
while( identical( this.env, globalenv() ) == FALSE ) {
clusterExport(cl,
ls(all.names=TRUE, env=this.env),
envir=this.env)
this.env <- parent.env(environment())
}
clusterExport(cl,
ls(all.names=TRUE, env=globalenv()),
envir=globalenv())
## Load the libraries on all the clusters
## N.B. length(cl) returns the number of clusters
parLapply( cl, 1:length(cl), function(xx){
lapply(loaded.package.names, function(yy) {
require(yy , character.only=TRUE)})
})
## Run the lapply in parallel
return( parLapply( cl, ...) )
}, finally = {
## Stop the cluster
stopCluster(cl)
})
}
## Warn the user if they are using Windows
if( Sys.info()[['sysname']] == 'Windows' ){
message(paste(
"\n",
" *** Microsoft Windows detected ***\n",
" \n",
" For technical reasons, the MS Windows version of mclapply()\n",
" is implemented as a serial function instead of a parallel\n",
" function.",
" \n\n",
" As a quick hack, we replace this serial version of mclapply()\n",
" with a wrapper to parLapply() for this R session. Please see\n\n",
" http://www.stat.cmu.edu/~nmv/2014/07/14/implementing-mclapply-on-windows \n\n",
" for details.\n\n"))
}
## If the OS is Windows, set mclapply to the
## the hackish version. Otherwise, leave the
## definition alone.
mclapply <- switch( Sys.info()[['sysname']],
Windows = {mclapply.hack},
Linux = {mclapply},
Darwin = {mclapply})
## end mclapply.hack.R
setwd("M:/DataAnalysis/_Reports/EX00417 (Bluebird Metabolomics Pilot)/A003 - Untargeted/Documents/Dereplication/2016-03-04/NEG")
# Load libraries and functions
library(ggplot2)
library(reshape2)
library(bisoreg)
library(plyr)
source("M:/DataAnalysis/Automation scripts & soft/R/mclapply.hack.R", local=T)
# Original for span vals: 0.25, 1, by = 0.05
# If number of pooled samples < 5, fold = number of pooled
#
# If pooled don't flank all samples (have to extrapolate):
# theta.fit <- function(x, y, span) loess(y ~ x, span = span, control = loess.control(surface = "direct"))
#
# "folds" should be <= number of pooled samples
#
lwrap <- function (x, y, span.vals = seq(1, 10, by = 0.5), folds = 5){
mae <- numeric(length(span.vals))
theta.fit <- function(x, y, span) loess(y ~ x, span = span)
theta.predict <- function(fit, x0) predict(fit, newdata = x0)
ii = 0
for (span in span.vals) {
ii <- ii + 1
y.cv <- crossval(x, y, theta.fit, theta.predict, span = span, ngroup = folds)$cv.fit
fltr <- !is.na(y.cv)
mae[ii] <- mean(abs(y[fltr] - y.cv[fltr]))
}
span <- span.vals[which.min(mae)]
out <- loess(y ~ x, span = span, control = loess.control(surface = "direct"))
return(out)
}
adjustArea <- function(x){
tgts <- subset(x, select = c(TIME, AREA), TYPE=="POOLED")
lwr <- lwrap(tgts$TIME, tgts$AREA)
x$fitted <- predict(lwr, x$TIME)
x$adj = round(x$AREA / x$fitted * median(tgts$AREA))
return(x)
}
dat <- read.delim("EX00417_NEG_DATA_NO_DUPLICATES.TXT", check.names = FALSE)
tdat <- as.data.frame(t(dat))
colnames(tdat) <- as.character(dat[,1])
dat <- tdat[-1,]
dat$Sample <- row.names(dat)
rm(tdat)
indx <- sapply(dat, is.factor)
indx[1]<-FALSE
dat[indx] <- lapply(dat[indx], function(x) as.numeric(as.character(x)))
dat.m <- melt(dat, id.vars = c("Sample", "TYPE", "TIME"), variable.name = "TARGET", value.name = "AREA", na.rm = T)
dat.s <- split(dat.m, dat.m$TARGET)
dadj <- adjustArea(dat.s[[1]])
dplot <- ggplot(dadj, aes(TIME, y = value, color = variable, shape = factor(TYPE))) +
geom_point(aes(y = adj, col = "adj"), size=3) +
geom_line(aes(y = fitted, col = "fitted")) +
geom_point(aes(y = AREA, col = "AREA"))
NAME 20150507-EX00417-A003-IN0016-PO-CS0000009.1-N 20150507-EX00417-A003-IN0016-PO-CS0000009.2-N 20150507-EX00417-A003-IN0016-PO-CS0000009.3-N 20150507-EX00417-A003-IN0016-PO-CS0000009.4-N 20150507-EX00417-A003-IN0016-PO-CS0000009.5-N 20150507-EX00417-A003-IN0016-S00017715-N 20150507-EX00417-A003-IN0016-S00017716-N 20150507-EX00417-A003-IN0016-S00017717-N 20150507-EX00417-A003-IN0016-S00017718-N 20150507-EX00417-A003-IN0016-S00017719-N 20150507-EX00417-A003-IN0016-S00017720-N 20150507-EX00417-A003-IN0016-S00017721-N 20150507-EX00417-A003-IN0016-S00017722-N 20150507-EX00417-A003-IN0016-S00017723-N 20150507-EX00417-A003-IN0016-S00017724-N 20150507-EX00417-A003-IN0016-S00017725-N 20150507-EX00417-A003-IN0016-S00017726-N 20150507-EX00417-A003-IN0016-S00017727-N 20150507-EX00417-A003-IN0016-S00017728-N 20150507-EX00417-A003-IN0016-S00017729-N 20150507-EX00417-A003-IN0016-S00017730-N 20150507-EX00417-A003-IN0016-S00017731-N 20150507-EX00417-A003-IN0016-S00017732-N 20150507-EX00417-A003-IN0016-S00017733-N 20150507-EX00417-A003-IN0016-S00017734-N 20150507-EX00417-A003-IN0016-S00017735-N 20150507-EX00417-A003-IN0016-S00017736-N 20150507-EX00417-A003-IN0016-S00017737-N 20150507-EX00417-A003-IN0016-S00017738-N 20150507-EX00417-A003-IN0016-S00017739-N 20150507-EX00417-A003-IN0016-S00017740-N 20150507-EX00417-A003-IN0016-S00017741-N 20150507-EX00417-A003-IN0016-S00017742-N 20150507-EX00417-A003-IN0016-S00017743-N 20150507-EX00417-A003-IN0016-S00017744-N 20150507-EX00417-A003-IN0016-S00017745-N 20150507-EX00417-A003-IN0016-S00017746-N
TYPE POOLED POOLED POOLED POOLED POOLED SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE SAMPLE
TIME 0 5.65 11.3 16.95 22.6 0.63 1.26 1.88 2.51 3.14 3.77 4.39 5.02 6.28 6.91 7.53 8.16 8.79 9.42 10.04 10.67 11.93 12.55 13.18 13.81 14.44 15.07 15.69 16.32 17.58 18.2 18.83 19.46 20.09 20.71 21.34 21.97
L-HISTIDINE (M-H)- 85511 84182 84460 81713 73492 61040 88556 27650 75078 95983 99869 66847 75195 80357 34602 74307 70824 81666 94960 65535 54473 44840 82196 93111 67506 72186 66416 62415 70721 81199 55746 77000 35269 77939 73921 75588 58132
CITRIC ACID (M-H)- 8495116 6615815 7294822 7329470 6494593 5285724 7429132 2422089 7370853 8033781 7998543 6080085 7445175 7302322 3060311 7190515 8211321 7524816 7341764 6216463 6151818 4784126 7078060 6813009 7212430 6065239 8153841 6578876 4683127 7016849 7137953 9381467 3167750 8881683 7852891 7553011 6312367
SUCCINIC ACID (M-H)- 105507 106669 102665 102646 99346 70086 102363 23709 74260 104657 100744 84135 84588 68097 19068 96911 84150 73252 78657 116426 83675 42668 94628 62724 79655 87166 97551 84735 96981 82080 106453 35261 29145 111736 92888 103276 69304
TRANS-ACONITIC ACID (M-H)- 387520 440274 424221 126330 75133 303391 467775 158946 154158 230590 136774 105146 304805 279782 209932 558651 660411 118712 355268 283335 393161 771528 788553 334986 316278 286394 116297 319577 377277 283404 555786 712513 221401 422831 302480 377889 318259
L-TRYPTOPHAN-15N2 [ISTD] (M-H)- 18906 18081 15152 17959 17378 13177 20116 11774 16048 15316 14043 16505 13870 8060 16526 14937 13506 13561 16819 8462 12875 14173 1999 14535 11631 13498
MANDELIC ACID #2 (M-H)- 101770 104046 99808 100814 105358 796039 82235 29116 882144 790941 4053 14036 45904 2103 138342 14658
ZEATIN [ISTD] (M+Cl)- 339212 333869 346860 356563 333685 238428 318941 74414 327442 325604 349110 331978 340313 320461 89713 339945 357351 348773 368487 351765 349453 184334 356807 355272 353466 316420 359382 329872 361255 350826 323857 326036 83091 359986 363606 336547 325351
L-[15N]ANTHRANILIC ACID [ISTD] (M-H)- 17565 18356 20912 17861 18632 13141 18955 16744 14052 16640 19051 18730 16649 5410 18670 18994 17036 19985 18064 18600 9919 16695 15475 18751 19334 17400 17231 14839 20450 16034 20551 4324 20712 23862 18955 16717
XANTHINE (M-H)- 57048 58689 51868 55983 54251 53100 39411 17790 36613 35473 51345 49189 47860 40156 11381 118681 95377 40972 44895 95990 75865 38564 76604 50720 43091 43055 35460 38299 55747 26176 31392 38874 11277 52438 56737 40726 41125
THEOPHYLLINE (M-H)- 513117 516259 519962 543743 498036 58438 95124 67807 215293 562421 439414 6832 6457 54161 13992 426364 109640 38096 36421 25458 124558 267763 111210 94747 160404 310702 4299420 6018 3625453 10142 47885 194782 160746 5421 35457
16:0 LYSO PC (M+Cl)- 46350 39134 36170 37790 41527 12740 54038 7229 35525 42177 42309 47847 39361 49558 21002 50780 51119 37636 42092 43920 38268 47550 60673 40755 41594 35699 41793 59793 58280 47288 21415 46023 23607 27930 28524 48580 51681
BENZOIC ACID (M-H)- 13126 14592 12036 12881 12882 9015 9756 5471 18177 9125 9968 8760 7822 13893 11334 11220 12353 12563 7060 8654 15901 6109 9845 6422 8311 8035 11925 11859 8443 15170 13377 8208 8652
STEARIC ACID (M-H)- 164026 187122 198809 235696 291097 130631 301223 53764 216708 202130 188941 197644 111428 186968 51649 235284 225385 202622 223448 263602 180345 161018 248369 162251 233651 276912 282746 333284 354394 272801 267884 390999 92183 490128 965168 310622 321834
GLUTAMINE (M-H)- 175692 187355 159946 173314 126829 135850 153111 131361 178711 173852 181790 195776 182939 122440 139759 157307 156659 182952 157326 121089 90751 128255 195301 166108 161864 148349 283778 264157 146268 130429 183219 134019 149559 155530 152809 131704
ETHYLMALONIC ACID (M-H)- 17656 17571 18647 4811 17633 5140 10186 9772 3436 8428 8972 1884 3803 1830 18971 18391 52487 58855 5747 3713 24646 13613 2001
DEOXYURIDINE (M+Cl)- 24366 24054 24495 25754 21036 13903 12464 1180 32701 24193 35609 21661 19944 18231 12819 8835 27500 27357 8536 5180 18388 15545 19682 25155 23090 21803 17440 20880 43095 15645 20951 15344
AZELAIC ACID (M-H)- 47102 44828 41977 46604 43483 30350 48072 9759 38964 34822 38841 36667 33319 34686 9423 36141 37012 41063 44079 84615 28154 14753 31202 30747 38383 31529 26395 30993 31814 28452 40489 51558 10354 45173 42790 41247 37309