Principle structure for a new project
main.R initialization.R parallelity.R
# This script initializes a parallel workflow.
# The idea is to use cv.learn.func() for crossvalidation-methods.
# If "enableParallelization" is true, the parallel version is used.
# args are the command line parameters, extracted in "initialization.R"
# cip for cippool, clust for cluster
# You have to give the Rscript the argument "cip*", "clust*"
# AND the path to the pcs where R is prepared.
# parallelity initialization
if(enableParallelization == TRUE){
library("doFuture")
registerDoFuture()
# args got from initialization.R
# cip for cippool, clust for cluster
# You have to give the Rscript the argument "cip*", "clust*"
# AND the path to the pcs where R is prepared.
is.cluster.declared <- grepl("cip", args) | grepl("clust", args)
if(any(is.cluster.declared)){
where.is.cluster.declared <- which(is.cluster.declared)
prepared.pcs <- read.csv(args[where.is.cluster.declared + 1], stringsAsFactors = FALSE)[, 1]
plan(list(
tweak(cluster, workers = prepared.pcs),
tweak(multiprocess, workers = 4L)
))
}else{
plan(multiprocess, workers = 4L) # 4 seems to much for my PC at home.. (but just for the geneRatios)
}
options(future.globals.maxSize= 1000 * 1024 ^ 2)
}
source.function("cv.learn.func", A_FUNCD)
if(enableParallelization && cv.learn.func_inParallel){
source.function("cv.learn.func_parallel", A_FUNCD)
cv.learn.func <- match.fun("cv.learn.func_parallel")
rm(cv.learn.func_parallel)
}
# This script calls all other necessary scripts
cleanrestart <- FALSE
source("Scripts/initialization.R")
enableParallelization <- FALSE
cv.learn.func_inParallel <- FALSE # if TRUE, the function cv.learn.func() is overwritten
# with a parallel version.
source("Scripts/parallelity.R")
catt(" ----- Initialization done ----- ")
A_PDF.DIR <- file.path(A_PDF.DIRmain, "01_dataloading")
dir.create(A_PDF.DIR, recursive = TRUE)
A_RUNTIME <- source.addtime("Scripts/subscripts/01_dataloading/01_loadPhenodata.R", runtimecobject = A_RUNTIME)
A_PDF.DIR <- file.path(A_PDF.DIRmain, "02_RawDataInspection")
dir.create(A_PDF.DIR, recursive = TRUE)
A_RUNTIME <- source.addtime("Scripts/subscripts/02_RawDataInspection/01_", runtimecobject = A_RUNTIME)
A_PDF.DIR <- file.path(A_PDF.DIRmain, "03_qualityControl")
dir.create(A_PDF.DIR, recursive = TRUE)
A_RUNTIME <- source.addtime("Scripts/subscripts/03_qualityControl/01_", runtimecobject = A_RUNTIME)
A_PDF.DIR <- file.path(A_PDF.DIRmain, "04_learning", "01_method_1")
dir.create(A_PDF.DIR, recursive = TRUE)
A_RUNTIME <- source.addtime("Scripts/subscripts/04_learning/01_method_1", runtimecobject = A_RUNTIME)
####### This comes at the end of the main-script
A_RUNTIME <- summarizeRT(A_RUNTIME)
write(A_RUNTIME, "runtime.txt")
save.image(file=file.path(A_RDA.WORKSPACE, "finished.rda"))
load(file.path(A_RDA.WORKSPACE, "finished.rda"))
print(sessionInfo())
# The following variables can be modified but should be correct if the data was downloaded
# correctly and one has proper rights in the directory
# A_RDA.DIR - directory where all .rda-files are saved (except .rda's which are distributed)
# A_PDF.DIRmain - main PDF-directory (all pdfs are stored here)
# A_DATA.DIR - all downloaded files are saved here
# A_SUBD - directory which contains all subscripts (should not be modified usually)
# A_FUNCD - directory holding functions used by the subscripts or main.R (should not be modified usually)
# If you want a clean restart, remove all RDA* and PDF* directories
# If you want a completely clean restart,
# 1) remove all RDA* and PDF* directories
# 2) delete all (used) libraries, they will be reinstalled during the script
# global variables are declared as "A_VARIABLENAME"
# "A_" to sort them on top by Rstudio's environment, and all in upper case.
if(cleanrestart){# not tested with commandline arguments(22.5.18)
rm(list=ls())
cleanrestart <- TRUE
}
args <- commandArgs(trailingOnly = TRUE)
if(is.na(args[1]))
args[1] <- FALSE
##### necessary libraries install commands (tested under ubuntu)
# sudo apt-get install libcurl4-openssl-dev libssl-dev
# sudo apt-get install udunits-bin # probably (not tested) the necessary for udunits2-R-package
# cleanrestart <- FALSE
if(cleanrestart || args[1] == "cleanrestart"){
unlink("Results/PDF", recursive = TRUE)
unlink("Results/RDA", recursive = TRUE)
unlink("Data/OperationalData/*", recursive = TRUE)
unlink(c("runtime.txt", "outMAIN.txt"))
}
# load basic scripts - package
# runallscripts <- TRUE
if(!require("devtools")){
# sudo apt-get install libcurl4-openssl-dev libssl-dev
install.packages("devtools", repos="http://cran.rstudio.com/")
}
library(devtools)
if(!require("basicscriptsGG")){
install_github("gugl58/basicscriptsGG")
}
library(basicscriptsGG)
###### setting global variables ######
# directories where
A_RDA.DIR <- "Results/RDA" # all .rda are saved
A_RDA.WORKSPACE <- file.path(A_RDA.DIR, "workspaces")
A_PDF.DIRmain <- "Results/PDF" # all pdf are saved
dir.create(A_RDA.DIR, showWarnings = F, recursive = TRUE)
dir.create(A_RDA.WORKSPACE, showWarnings = F, recursive = TRUE)
dir.create(A_PDF.DIRmain, showWarnings = F, recursive = TRUE)
A_SUBD <- "Scripts/subscripts" # directory where subscripts are located
if(! file.exists(A_SUBD)){
stop(paste0("Subscripts-Directory \"", A_SUBD, "\" does not exist."))
}
A_FUNCD <- paste0(A_SUBD, "/#functions") # directory where subscripts are located
if(! file.exists(A_FUNCD)){
stop(paste0("Functions-Directory \"", A_FUNCD, "\" does not exist."))
}
###### initialising runtimeC-class variable ######
catt <- define.catt()
A_RUNTIME <- RuntimeC() # initialize runtime-array
A_scriptname <- "\"Skript is not called from source.addtime(), therefore naming is not correct\"" # for testing purposes.
# subscripts print their name usually, but if you source a subscript directly,
# the variable holding its name is not set. Therefore, this variable is set as backup
###### options
options(warn=1) #warnings printed as they occur
##### Install and load all packages later used. (This is redundant, but now if an error occurs in installing it occurs in the beginning)
#inst.load.packages("GEOquery", inst.bioclite = TRUE)
#inst.load.packages("tibble")
#inst.load.packages("plyr")
#inst.load.packages("affy", inst.bioclite = TRUE)
# install_github("rehbergT/zeroSum/zeroSum")
#library("zeroSum")
#inst.load.packages("hgu133plus2.db", inst.bioclite = TRUE) # Genenames to symbols
#inst.load.packages("reshape2")
#inst.load.packages("ggplot2")
#inst.load.packages("tidyr")
#inst.load.packages("ggfittext")