s4553711
10/2/2017 - 9:05 AM

about-fastq-process.scala


// read line by line
val fastqs = sc.textFile(input, 2)
val result = fastqs.map(n => {
    n
})
println("size> "+result.partitions.size) // 1
println("total fastqs lines:"+fastqs.count()) // 8

val fastqRecords = fastqs.mapPartitions{ x => {
    var result = List[(String, String, String, String)]()
    while (x.hasNext) {
        val cur = (x.next(), x.next(), x.next(), x.next())
        result.::=(cur)
    }
    result.iterator
}}
fastqRecords.foreach(fq => {
    println("fastq: "+fq._1)
})
println("size> "+fastqRecords.partitions.size) // 1
println("total fastqs lines: "+fastqRecords.count()) // 2