Given a text file, count/display the duplicate and unique lines. Trim optional.
String fileName = !!args ? args[0] : /C:\somedefaulttext.txt/
String ignoreLinesWith = /.*[{}].*/
boolean shouldTrim = true
File input = new File(fileName)
Set lines = [] as Set
Map dups = [:].withDefault{1}
int cntLines = 0
int MAX_LABEL_SIZE = 50
use (MaxCategory) { // provide maxSize() on Strings
input.eachLine { l->
if (shouldTrim) l = l.trim()
if (!l || l.size() == 1 || l ==~ ignoreLinesWith) return
lines.contains(l) ? dups[l]++ : lines << l
cntLines++
}
dups = dups.sort({a,b-> (a.value < b.value) ? 1 : (a.value == b.value) ? 0 : -1})
dups.each { k,v->
println "$v : `${k.maxSize(MAX_LABEL_SIZE)}`"
}
}
println "\nFile: $fileName"
println " >checked lines: $cntLines"
println " >unique lines: ${cntLines - dups.values().sum()}"
println " >duplicate lines found: ${dups.keySet().size()}"
println " >total duplicate lines found: ${dups.values().sum()}"
class MaxCategory {
static String maxSize(String s, int size) {
return (s.size() > size ? s[0..size]+'...' : s)
}
}