package com.dominolabs.codechallenge;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
public class Concordance {
private BreakIterator sentenceIterator = null;
private BreakIterator wordIterator = null;
private Map<String, Map<Integer, List<Integer>>> concordance = new TreeMap<String, Map<Integer, List<Integer>>>();
private int cnt = 0;
public Concordance() {
sentenceIterator = BreakIterator.getSentenceInstance(Locale.US);
wordIterator = BreakIterator.getWordInstance(Locale.US);
}
private void extractWords(int sentenceIndex , String target) {
wordIterator.setText(target);
int start = wordIterator.first();
int end = wordIterator.next();
while (end != BreakIterator.DONE) {
String word = target.substring(start, end);
if (Character.isLetterOrDigit(word.charAt(0))) {
word = word.toLowerCase();
// System.out.println(word);
Map<Integer,List<Integer>> info = null;
List<Integer> indexes = null;
// put items into concordance
if (concordance.containsKey(word)){
info = concordance.get(word);
Integer freq = info.keySet().iterator().next();
indexes = info.get(freq);
info.remove(freq);
freq+=1;
indexes.add(cnt);
info.put(freq, indexes);
concordance.put(word, info);
}else{
info = Maps.newTreeMap();
indexes = Lists.newArrayList();
indexes.add(sentenceIndex);
info.put(1, indexes);
}
concordance.put(word, info);
}
start = end;
end = wordIterator.next();
}
}
private int enumerateSentences(BreakIterator bi, String source) {
bi.setText(source);
int lastIndex = bi.first();
while (lastIndex != BreakIterator.DONE) {
int firstIndex = lastIndex;
lastIndex = bi.next();
if (lastIndex != BreakIterator.DONE) {
String sentence = source.substring(firstIndex, lastIndex);
System.out.println("sentence = " + sentence);
extractWords(cnt,sentence);
cnt += 1;
}
}
return cnt;
}
public Map<String, Map<Integer, List<Integer>>> buildConcordance(File fin) {
// Construct BufferedReader from FileReader
int cnt = 0;
StringBuilder sb = new StringBuilder();
try {
BufferedReader br = new BufferedReader(new FileReader(fin));
String line = null;
while ((line = br.readLine()) != null) {
sb.append(line).append(" ");
}
enumerateSentences(sentenceIterator, sb.toString());
br.close();
} catch (IOException e) {
e.printStackTrace();
}
return concordance;
}
}