danielecook
3/5/2019 - 5:02 PM

nextflow drops cache

nextflow drops cache


align = design.combine([8,12,16]) // Cores
              .combine([12,32,64]) // Memory (GB)
              .combine([1,2,3,7,8,9]) // Method


process alignment {
    /*
        Alignment as performed identically to the old method
    */

    cache 'deep'

    tag { gen_tag("alignment", row, meta) }

    module 'BWA/0.7.15-intel-2016b:picard/1.107-Java-1.8.0_92:SAMtools/1.3.1-foss-2016b:Sambamba/0.6.6'

    memory "${memory} GB"

    cpus "${cores}"

    publishDir "output/bam/benchmark"

    errorStrategy 'finish'

    when:
      // Only run when there is at least 0.187 Gb/core
      cores / memory > 0.187

  	input:
	      set val(row), val(meta), file(fq1), file(fq2), val(cores), val(memory), val(method) from align

	  output:
		    set val(row), val(meta), file("${slug}.bam"), file("${slug}.bam.bai")  into align_out

	script:
		// Construct read group
		RG = ["@RG",
			  "ID:${row.sample_name}.${row.seq_sample_id}_${row.run_dir}_${row.lane}",
			  "SM:${row.sample_name}",
			  "LB:${row.seq_sample_id}",
			  "PU:${row.flowcell}.${row.lane}",
			  "PL:illumina",
			  "SM:${row.sample_name}"].join("\\t")
        
        meta = new HashMap(meta)
        meta['cores'] = cores
        meta['memory'] = memory
        meta['method'] = method
        
        PICARD_PARAMS = "MAX_RECORDS_IN_RAM=2000000 VALIDATION_STRINGENCY=STRICT"

        slug = p(row, meta)

    if (method == 1)
        """
    		    # 'Old' method
            # Uses the old samtools
            module load SAMtools/0.1.19-foss-2016b
    		    bwa mem -t ${task.cpus} \\
    				-R "${RG}" \\
    				${params.genome} ${fq1} ${fq2} | \\
            awk 'length(\$10) > 35 || \$1 ~ /^@/' - | \\
            samtools view -@ ${task.cpus} -bhS - > out.unsorted.bam

            java -d64 -Xmx8g -jar \${ROOTPICARD}/SortSam.jar INPUT=out.unsorted.bam OUTPUT=${slug}.bam SORT_ORDER=coordinate ${PICARD_PARAMS}
            java -d64 -Xmx8g -jar \${ROOTPICARD}/BuildBamIndex.jar INPUT=${slug}.bam OUTPUT=${slug}.bam.bai ${PICARD_PARAMS}
        """
    else if (method == 2)
        """
        # Samtools+sambamba with intermediate file
        bwa mem -t ${task.cpus} \\
            -R "${RG}" \\
            ${params.genome} ${fq1} ${fq2} | \\
        awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
        samtools view -bh --threads ${task.cpus} - > out.tmp.bam
        
        # Sort alignment
        sambamba sort -m "${task.memory.toGiga()}GB" \\
                      --nthreads=${task.cpus*params.threads_per_core} \\
                      --tmpdir=. \\
                      --out=${slug}.bam \\
                      out.tmp.bam

        sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam
        rm out.tmp.bam
        """
    else if (method == 3)
        """
        # Samtools+sambamba with intermediate file and balanced threads
        bwa mem -t ${task.cpus/2} \\
            -R "${RG}" \\
            ${params.genome} ${fq1} ${fq2} | \\
        awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
        samtools view -bh --threads ${task.cpus/2} - > out.tmp.bam
        
        # Sort alignment
        sambamba sort -m "${task.memory.toGiga()}GB" \\
                      --nthreads=${task.cpus} \\
                      --tmpdir=. \\
                      --out=${slug}.bam \\
                      out.tmp.bam

        sambamba index --nthreads=${task.cpus} ${slug}.bam
        rm out.tmp.bam
        """
    else if (method == 4)
        """
        # Samtools+sambamba with pipe unbalanced
        bwa mem -t ${task.cpus} \\
            -R "${RG}" \\
            ${params.genome} ${fq1} ${fq2} | \\
        awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
        samtools view -bh --threads ${task.cpus} - | \\
        sambamba sort -m "${task.memory.toGiga()-2}GB" \\
                      --nthreads=${task.cpus} \\
                      --tmpdir=. \\
                      --out=${slug}.bam \\
                      /dev/stdin

        sambamba index --nthreads=${task.cpus} ${slug}.bam
        """
    else if (method == 5)
        """
        # Samtools+sambamba with pipe and balanced threads
        bwa mem -t ${task.cpus/2} \\
            -R "${RG}" \\
            ${params.genome} ${fq1} ${fq2} | \\
        awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
        samtools view -bh --threads ${task.cpus/4} - | \\
        sambamba sort -m "${task.memory.toGiga()-2}GB" \\
                      --nthreads=${task.cpus/4} \\
                      --tmpdir=. \\
                      --out=${slug}.bam \\
                      /dev/stdin

        sambamba index --nthreads=${task.cpus} ${slug}.bam
        """
    else if (method == 6)
        """
        # Samtools+sambamba with pipe, balanced threads and balanced memory
        bwa mem -t ${task.cpus/2} \\
            -R "${RG}" \\
            ${params.genome} ${fq1} ${fq2} | \\
        awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
        samtools view -bh --threads ${task.cpus/4} - | \\
        sambamba sort -m "${(task.memory.toGiga()/2)-2}GB" \\
                      --nthreads=${task.cpus/4} \\
                      --tmpdir=. \\
                      --out=${slug}.bam \\
                      /dev/stdin

        sambamba index --nthreads=${task.cpus} ${slug}.bam
        """
    else if (method == 7)
        """
        # Samtools+sambamba with intermediate file; No threads on samtools out
        bwa mem -t ${task.cpus} \\
            -R "${RG}" \\
            ${params.genome} ${fq1} ${fq2} | \\
        awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\
        samtools view -bh - > out.tmp.bam
        
        # Sort alignment
        sambamba sort -m "${task.memory.toGiga()}GB" \\
                      --nthreads=${task.cpus*params.threads_per_core} \\
                      --tmpdir=. \\
                      --out=${slug}.bam \\
                      out.tmp.bam

        sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam
        rm out.tmp.bam
        """
    else if (method == 8)
        """
        # BWA Intermediate; Samtools intermediate;Samtools+sambamba with intermediate file; No threads on samtools out
        bwa mem -t ${task.cpus} \\
            -R "${RG}" \\
            ${params.genome} ${fq1} ${fq2} | \\
        awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' > out.SAM
        samtools view -bh --threads ${task.cpus} out.SAM > out.tmp.bam
        rm out.SAM
        
        # Sort alignment
        sambamba sort -m "${task.memory.toGiga()}GB" \\
                      --nthreads=${task.cpus*params.threads_per_core} \\
                      --tmpdir=. \\
                      --out=${slug}.bam \\
                      out.tmp.bam

        sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam
        rm out.tmp.bam
        """
    else if (method == 9)
        """
        # BWA Intermediate; Samtools pipe to sambamba
        bwa mem -t ${task.cpus} \\
            -R "${RG}" \\
            ${params.genome} ${fq1} ${fq2} | \\
        awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' > out.SAM
        samtools view -bh --threads ${task.cpus} out.SAM > out.tmp.bam
        
        # Sort alignment
        sambamba sort -m "${task.memory.toGiga()}GB" \\
                      --nthreads=${task.cpus*params.threads_per_core} \\
                      --tmpdir=. \\
                      --out=${slug}.bam \\
                      out.tmp.bam

        sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam
        rm out.tmp.bam out.SAM
        """



}