software-mariodiana
7/6/2016 - 8:26 PM

How to use some annotations to filter a list of some other annotation. (UIMA and DKPro.)

How to use some annotations to filter a list of some other annotation. (UIMA and DKPro.)

// Assume Token, Sentence, POS, and NGram (2) annotations have been applied.
Collection<NGram> ngrams = JCasUtil.select(jcas, NGram.class);

// Filter all single-word NGrams and all NGrams containing POS of "PUNC".
List<NGram> bigrams = ngrams.stream()
        .filter(n -> isBigram(n, jcas))
        .filter(b -> hasNoPunctuation(b, jcas))
        .collect(Collectors.toList());
        
/* Static methods for filtering. */

private static boolean isBigram(NGram ngram, JCas jcas) {
    int begin = ngram.getBegin();
    int end = ngram.getEnd();
    return (JCasUtil.selectCovered(jcas, Token.class, begin, end).size() == 2);
}

private static boolean hasNoPunctuation(NGram bigram, JCas jcas) {
    int begin = bigram.getBegin();
    int end = bigram.getEnd();
    return (JCasUtil.selectCovered(jcas, PUNC.class, begin, end).size() == 0);
}