holajoyce
3/23/2016 - 12:23 AM

Concordance.java

package com.dominolabs.codechallenge;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

public class Concordance {

	private BreakIterator sentenceIterator = null;
	private BreakIterator wordIterator = null;

	private Map<String, Map<Integer, List<Integer>>> concordance = new TreeMap<String, Map<Integer, List<Integer>>>();
	private int cnt = 0;

	public Concordance() {
		sentenceIterator 	= BreakIterator.getSentenceInstance(Locale.US);
		wordIterator 		= BreakIterator.getWordInstance(Locale.US);
	}
	
	private void extractWords(int sentenceIndex , String target) {
		wordIterator.setText(target);
		int start = wordIterator.first();
		int end = wordIterator.next();
		while (end != BreakIterator.DONE) {
			String word = target.substring(start, end);
			if (Character.isLetterOrDigit(word.charAt(0))) {
				word = word.toLowerCase();
//				System.out.println(word);
				
				Map<Integer,List<Integer>>  info = null;
				List<Integer> indexes  = null;
				// put items into concordance
				if (concordance.containsKey(word)){
					info = concordance.get(word);
					Integer freq = info.keySet().iterator().next();
					indexes = info.get(freq);
					info.remove(freq);
					freq+=1;
					indexes.add(cnt);
					info.put(freq, indexes);
					concordance.put(word, info);
				}else{
					info  = Maps.newTreeMap();
					indexes = Lists.newArrayList();
					indexes.add(sentenceIndex);
					info.put(1, indexes);
				}
				concordance.put(word, info);
			}
			start = end;
			end = wordIterator.next();
		}
	}
	private int enumerateSentences(BreakIterator bi, String source) {
		bi.setText(source);
		int lastIndex = bi.first();
		while (lastIndex != BreakIterator.DONE) {
			int firstIndex = lastIndex;
			lastIndex = bi.next();
			if (lastIndex != BreakIterator.DONE) {
				String sentence = source.substring(firstIndex, lastIndex);
				System.out.println("sentence = " + sentence);
				extractWords(cnt,sentence);
				cnt += 1;
			}
		}
		return cnt;
	}

	public Map<String, Map<Integer, List<Integer>>> buildConcordance(File fin) {
		// Construct BufferedReader from FileReader
		int cnt = 0;
		StringBuilder sb = new StringBuilder();
		try {
			BufferedReader br = new BufferedReader(new FileReader(fin));
			String line = null;
			while ((line = br.readLine()) != null) {
				sb.append(line).append(" ");
			}
			enumerateSentences(sentenceIterator, sb.toString());
			br.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return concordance;
	}
}