crazy4groovy
1/23/2014 - 4:14 PM

Given a text file, count/display the duplicate and unique lines. Trim optional.

Given a text file, count/display the duplicate and unique lines. Trim optional.

String fileName = !!args ? args[0] : /C:\somedefaulttext.txt/
String ignoreLinesWith = /.*[{}].*/
boolean shouldTrim = true

File input = new File(fileName)

Set lines = [] as Set
Map dups = [:].withDefault{1}
int cntLines = 0
int MAX_LABEL_SIZE = 50

use (MaxCategory) { // provide maxSize() on Strings

    input.eachLine { l->
        if (shouldTrim) l = l.trim()
        
        if (!l || l.size() == 1 || l ==~ ignoreLinesWith) return
        
        lines.contains(l) ? dups[l]++ : lines << l
        cntLines++
    }
    
    dups = dups.sort({a,b-> (a.value < b.value) ? 1 : (a.value == b.value) ? 0 : -1})

    dups.each { k,v->
        println "$v : `${k.maxSize(MAX_LABEL_SIZE)}`"
    }

}

println "\nFile: $fileName"
println " >checked lines: $cntLines"
println " >unique lines: ${cntLines - dups.values().sum()}"
println " >duplicate lines found: ${dups.keySet().size()}"
println " >total duplicate lines found: ${dups.values().sum()}"




class MaxCategory {
    static String maxSize(String s, int size) {
        return (s.size() > size ? s[0..size]+'...' : s)
    }
}