Preparation of text to find specific pattern of words and then collect matches with a specific keyword in a list
import nltk
from nltk.chunk import *
from nltk.chunk.util import *
from nltk.chunk.regexp import *
from nltk import Tree
cp = nltk.RegexpParser('CHUNK: {<NN> <VB> <IN> <NN>}')
bucket = []
brown = nltk.corpus.brown
for sent in brown.tagged_sents():
tree = cp.parse(sent)
for subtree in tree.subtrees():
if subtree.label() == 'CHUNK':
if 'sciatica' in ' '.join([(''.join(''.join(leaf[0]))) for leaf in subtree]):
bucket.append(' '.join([(''.join(''.join(leaf[0]))) for leaf in subtree]))