nextflow drops cache
align = design.combine([8,12,16]) // Cores
.combine([12,32,64]) // Memory (GB)
.combine([1,2,3,7,8,9]) // Method
process alignment {
/*
Alignment as performed identically to the old method
*/
cache 'deep'
tag { gen_tag("alignment", row, meta) }
module 'BWA/0.7.15-intel-2016b:picard/1.107-Java-1.8.0_92:SAMtools/1.3.1-foss-2016b:Sambamba/0.6.6'
memory "${memory} GB"
cpus "${cores}"
publishDir "output/bam/benchmark"
errorStrategy 'finish'
when:
// Only run when there is at least 0.187 Gb/core
cores / memory > 0.187
input:
set val(row), val(meta), file(fq1), file(fq2), val(cores), val(memory), val(method) from align
output:
set val(row), val(meta), file("${slug}.bam"), file("${slug}.bam.bai") into align_out
script:
// Construct read group
RG = ["@RG",
"ID:${row.sample_name}.${row.seq_sample_id}_${row.run_dir}_${row.lane}",
"SM:${row.sample_name}",
"LB:${row.seq_sample_id}",
"PU:${row.flowcell}.${row.lane}",
"PL:illumina",
"SM:${row.sample_name}"].join("\\t")
meta = new HashMap(meta)
meta['cores'] = cores
meta['memory'] = memory
meta['method'] = method
PICARD_PARAMS = "MAX_RECORDS_IN_RAM=2000000 VALIDATION_STRINGENCY=STRICT"
slug = p(row, meta)
if (method == 1)
"""
# 'Old' method
# Uses the old samtools
module load SAMtools/0.1.19-foss-2016b
bwa mem -t ${task.cpus} \\
-R "${RG}" \\
${params.genome} ${fq1} ${fq2} | \\
awk 'length(\$10) > 35 || \$1 ~ /^@/' - | \\
samtools view -@ ${task.cpus} -bhS - > out.unsorted.bam
java -d64 -Xmx8g -jar \${ROOTPICARD}/SortSam.jar INPUT=out.unsorted.bam OUTPUT=${slug}.bam SORT_ORDER=coordinate ${PICARD_PARAMS}
java -d64 -Xmx8g -jar \${ROOTPICARD}/BuildBamIndex.jar INPUT=${slug}.bam OUTPUT=${slug}.bam.bai ${PICARD_PARAMS}
"""
else if (method == 2)
"""
# Samtools+sambamba with intermediate file
bwa mem -t ${task.cpus} \\
-R "${RG}" \\
${params.genome} ${fq1} ${fq2} | \\
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
samtools view -bh --threads ${task.cpus} - > out.tmp.bam
# Sort alignment
sambamba sort -m "${task.memory.toGiga()}GB" \\
--nthreads=${task.cpus*params.threads_per_core} \\
--tmpdir=. \\
--out=${slug}.bam \\
out.tmp.bam
sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam
rm out.tmp.bam
"""
else if (method == 3)
"""
# Samtools+sambamba with intermediate file and balanced threads
bwa mem -t ${task.cpus/2} \\
-R "${RG}" \\
${params.genome} ${fq1} ${fq2} | \\
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
samtools view -bh --threads ${task.cpus/2} - > out.tmp.bam
# Sort alignment
sambamba sort -m "${task.memory.toGiga()}GB" \\
--nthreads=${task.cpus} \\
--tmpdir=. \\
--out=${slug}.bam \\
out.tmp.bam
sambamba index --nthreads=${task.cpus} ${slug}.bam
rm out.tmp.bam
"""
else if (method == 4)
"""
# Samtools+sambamba with pipe unbalanced
bwa mem -t ${task.cpus} \\
-R "${RG}" \\
${params.genome} ${fq1} ${fq2} | \\
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
samtools view -bh --threads ${task.cpus} - | \\
sambamba sort -m "${task.memory.toGiga()-2}GB" \\
--nthreads=${task.cpus} \\
--tmpdir=. \\
--out=${slug}.bam \\
/dev/stdin
sambamba index --nthreads=${task.cpus} ${slug}.bam
"""
else if (method == 5)
"""
# Samtools+sambamba with pipe and balanced threads
bwa mem -t ${task.cpus/2} \\
-R "${RG}" \\
${params.genome} ${fq1} ${fq2} | \\
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
samtools view -bh --threads ${task.cpus/4} - | \\
sambamba sort -m "${task.memory.toGiga()-2}GB" \\
--nthreads=${task.cpus/4} \\
--tmpdir=. \\
--out=${slug}.bam \\
/dev/stdin
sambamba index --nthreads=${task.cpus} ${slug}.bam
"""
else if (method == 6)
"""
# Samtools+sambamba with pipe, balanced threads and balanced memory
bwa mem -t ${task.cpus/2} \\
-R "${RG}" \\
${params.genome} ${fq1} ${fq2} | \\
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
samtools view -bh --threads ${task.cpus/4} - | \\
sambamba sort -m "${(task.memory.toGiga()/2)-2}GB" \\
--nthreads=${task.cpus/4} \\
--tmpdir=. \\
--out=${slug}.bam \\
/dev/stdin
sambamba index --nthreads=${task.cpus} ${slug}.bam
"""
else if (method == 7)
"""
# Samtools+sambamba with intermediate file; No threads on samtools out
bwa mem -t ${task.cpus} \\
-R "${RG}" \\
${params.genome} ${fq1} ${fq2} | \\
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
samtools view -bh - > out.tmp.bam
# Sort alignment
sambamba sort -m "${task.memory.toGiga()}GB" \\
--nthreads=${task.cpus*params.threads_per_core} \\
--tmpdir=. \\
--out=${slug}.bam \\
out.tmp.bam
sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam
rm out.tmp.bam
"""
else if (method == 8)
"""
# BWA Intermediate; Samtools intermediate;Samtools+sambamba with intermediate file; No threads on samtools out
bwa mem -t ${task.cpus} \\
-R "${RG}" \\
${params.genome} ${fq1} ${fq2} | \\
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' > out.SAM
samtools view -bh --threads ${task.cpus} out.SAM > out.tmp.bam
rm out.SAM
# Sort alignment
sambamba sort -m "${task.memory.toGiga()}GB" \\
--nthreads=${task.cpus*params.threads_per_core} \\
--tmpdir=. \\
--out=${slug}.bam \\
out.tmp.bam
sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam
rm out.tmp.bam
"""
else if (method == 9)
"""
# BWA Intermediate; Samtools pipe to sambamba
bwa mem -t ${task.cpus} \\
-R "${RG}" \\
${params.genome} ${fq1} ${fq2} | \\
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' > out.SAM
samtools view -bh --threads ${task.cpus} out.SAM > out.tmp.bam
# Sort alignment
sambamba sort -m "${task.memory.toGiga()}GB" \\
--nthreads=${task.cpus*params.threads_per_core} \\
--tmpdir=. \\
--out=${slug}.bam \\
out.tmp.bam
sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam
rm out.tmp.bam out.SAM
"""
}