SentenceObjects.py
from strmodfunctions import *
#linked list that links togeter a sentence.
class Word (object):
def __init__(self, first=None, rest=None):
self.first = first
self.rest = rest
def get_data(self):
return self.first
def get_next(self):
return self.rest
def __getitem__(self, i):
if i == 0:
return self.first
get = self
while i > 0:
get = get.rest
i -= 1
if get == None:
raise IndexError('The Sentence Index is Out of Range.')
return get.first
class Fuse_Link (object):
def __init__(self, link1, link2):
self.chain = link1
self.chain.rest = link2
class Sentence (object):
#Holds Data for an indivudal sentence as a linked list.
def __init__(self, list):
list = self.remove_punc(list)
index = len(list) - 1
self.words = Word(list[index])
while index > 0:
index -= 1
self.words = Word(list[index], self.words)
self.subject, self.predicate = [], []
self.object = []
self.word_set()
def __repr__(self):
read = []
current_word = self.words
while current_word is not None:
read.append(current_word.first)
read.append(' ')
current_word = current_word.rest
read.pop()
return ''.join(read)
def __str__(self):
read = []
current_word = self.words
while current_word is not None:
read.append(current_word.first)
read.append(' ')
current_word = current_word.rest
read.pop()
return ''.join(read)
def __getitem__(self, i):
if i == 0:
return self.words.first
get = self.words
while i > 0:
get = get.rest
i -= 1
if get == None:
raise IndexError #The Sentence Index is Out of Range.
return get.first
def __setitem__(self, i, tag):
if i == 0:
self.words.POS_tag = tag
get = self.words
while i > 0:
get = get.rest
i -= 1
if get == None:
raise IndexError #The Sentence Index is Out of Range.
self.words.POS_tag = tag
def __len__(self):
length = 0
current_word = self.words
while current_word is not None:
length += 1
current_word = current_word.rest
return length
def __contains__(self, other):
if self.words.first == other:
return True
else:
current_word = self.words
while current_word is not None:
if current_word.first == other:
return True
current_word = current_word.rest
return False
def remove_punc(self, text):
if text[-1] == '?':
self.question = True
mod = list(text)
mod.pop()
mod = ''.join(mod)
mod = mod.lower()
return mod.split()
else:
self.question = False
mod = list(text)
mod.pop()
mod = ''.join(mod)
mod = mod.lower()
return mod.split()
def word_set(self):
collection = set({})
current_word = self.words
while current_word is not None:
collection.add(current_word.first)
current_word = current_word.rest
self.wordset = collection
def checktagnum(self, i):
if i == 0:
return self.words.POS_tag
get = self.words
while i > 0:
get = get.rest
i -= 1
if get == None:
raise IndexError #The Sentence Index is Out of Range.
return get.POS_tag
class Sentence_Bank (object):
#A container object that takes a text onject and breaks it into a bin of sentences.
def __init__(self, text):
self.text = text.split()
self.bank, self.template = [], []
index = 0
while self.text != []:
if self.template != []:
if self.template[-1][-1] == '.':
self.bank.append(Sentence(self.template))
self.template = []
else:
self.template.append(self.text[0])
del self.text[0]
else:
self.template.append(self.text[0])
del self.text[0]
self.bank.append(Sentence(self.template))
def __getitem__(self, i):
return self.bank[i]
def __len__(self):
return len(self.bank)
class Word_Ref (object):
#used for part of speech tagging, and word look up.
def __init__(self, selection):
if selection == 'Verbs':
wordfile = open('Verbs.txt', 'r')
wordstring = wordfile.read()
self.reference = wordstring.split()
elif selection == 'Nouns':
wordfile = open('Nouns.txt', 'r')
wordstring = wordfile.read()
self.reference = wordstring.split()
elif selection == 'Adjectives':
wordfile = open('Adjectives.txt', 'r')
wordstring = wordfile.read()
self.reference = wordstring.split()
elif selection == 'Adverbs':
wordfile = open('Adverbs.txt', 'r')
wordstring = wordfile.read()
self.reference = wordstring.split()
else:
raise ReferenceError('Must choose a valid reference library.')
def __contains__(self, other):
if other[-1] == ',':
return other[:-1] in self.reference
else:
return other in self.reference
def wordref_tester(text):
nouns = Word_Ref('Nouns')
verbs = Word_Ref('Verbs')
adjectives = Word_Ref('Adjectives')
adverbs = Word_Ref('Adverbs')
mod = list(text)
mod.pop()
mod = ''.join(mod)
mod = mod.lower()
mod = mod.split()
current_word = 0
while current_word < len(mod)-1:
if mod[current_word] in nouns:
current_word += 1
if mod[current_word] in verbs:
current_word += 1
if mod[current_word] in adverbs:
current_word += 1
if mod[current_word] in adjectives:
current_word += 1
else:
return mod[current_word]
return 'No Leaks'
class SP_Tagger (object):
#tags the subject and predicate of a sentence.
def __init__(self, target):
assert target.__class__ == Sentence #Must take a Sentence Object.
self.target = target
self.subject_words = ['the', 'in', 'and', 'or', 'with', 'as', 'at', 'he', 'his', 'she', 'her', 'i', 'a']
self.prepredicate_words = ['if', 'in', 'either', 'when', 'whether', 'however']
self.predicate_words = ['do', 'did', 'was', 'is', 'are', 'were', 'can', 'cannot', 'wont', 'come', 'like', 'came', 'will']
def __repr__(self):
if self.target.subject and self.target.predicate != []:
return join_with_spaces(self.target.subject), join_with_spaces(self.target.predicate)
else:
return None
def makelink(self, oldlink, newlink):
if oldlink.rest == None:
oldlink.rest = newlink
return oldlink
else:
oldlink = oldlink.rest
self.makelink(oldlink, newlink)
def append_subject(self, string):
self.target.subject.append(string)
def append_predicate(self, string):
self.target.predicate.append(string)
def tag(self):
writer_routes = ['subject', 'predicate', 'pre-predicate']
indicator = None
nouns = Word_Ref('Nouns')
verbs = Word_Ref('Verbs')
adjectives = Word_Ref('Adjectives')
adverbs = Word_Ref('Adverbs')
current_word = 0
while current_word < len(self.target)-1:
if len(self.target.subject) == 0 and len(self.target.predicate) == 0 :
#if self.target[current_word] in self.prepredicate_words:
#self.append_predicate(self.target[current_word])
#indicator = 'pre-predicate'
#current_word += 1
if self.target[current_word] in self.subject_words:
self.append_subject(self.target[current_word])
indicator = 'subject'
current_word += 1
if self.target[current_word] in nouns:
self.append_subject(self.target[current_word])
indicator = 'subject'
current_word += 1
if self.target[current_word] in adjectives:
self.append_subject(self.target[current_word])
indicator = 'subject'
current_word += 1
#if self.target[current_word] in adverbs:
#self.append_predicate(self.target[current_word])
#indicator = 'pre-predicate'
#current_word += 1
#if self.target[current_word] in verbs:
#self.append_predicate(self.target[current_word])
#indicator = 'pre-predicate'
#current_word += 1
else:
self.append_subject(self.target[current_word])
indicator = 'subject'
current_word += 1
if indicator == 'pre-predicate':
if self.target[current_word][-1] == ',':
self.append_predicate(self.target[current_word])
indicator = 'subject'
current_word += 1
else:
self.append_predicate(self.target[current_word])
current_word += 1
if indicator == 'subject':
if self.target[current_word] in nouns:
self.append_subject(self.target[current_word])
current_word += 1
if self.target[current_word] in adjectives:
self.append_subject(self.target[current_word])
current_word += 1
if self.target[current_word] in self.subject_words:
self.append_subject(self.target[current_word])
current_word += 1
if self.target[current_word] in self.predicate_words:
self.append_predicate(self.target[current_word])
indicator = 'predicate'
current_word += 1
if self.target[current_word] in verbs:
self.append_predicate(self.target[current_word])
indicator = 'predicate'
current_word += 1
if self.target[current_word] in adverbs:
self.append_predicate(self.target[current_word])
indicator = 'predicate'
current_word += 1
else:
print self.target[current_word]
raise ReferenceError('Word not identifiable')
if indicator == 'predicate':
self.append_predicate(self.target[current_word])
current_word += 1
else:
print self.target[current_word]
print indicator
raise ReferenceError('Incorrect set of conditionals being looped')
return join_with_spaces(self.target.subject), join_with_spaces(self.target.predicate)
#Tester Function for Subject_Predicate tagging
def SP_tagtester(text):
assert text[-1] == '.' #valid sentence text
test = Sentence(text)
tagger = SP_Tagger(test)
return tagger.tag()
class Compound_tagger (object):
#Used for tagging if a sentence is compounded.
def __init__(self):
self.compound_chars = {',', ':', ';'}
def __call__(self, words):
assert words.__class__ == Sentence #Must be a Valid Sentence Object.
if words.words.first[-1] in self.compound_chars:
words.compound = True
else:
current_word = words.words
while current_word is not None:
if current_word.first[-1] in self.compound_chars:
words.compound = True
return None
current_word = current_word.rest
words.compound = False
class Noun_tagger (object):
def __init__(self):
self.nouns = []
self.articles = {'the', 'a', 'an'}
self.conditionals = {'if', 'either'}
def __call__(self, statement):
current_word = 0
while current_word < len(statement)-1:
if statement[current_word] in self.conditionals:
self.nouns.append(statement[current_word+1])
current_word += 1
if statement[current_word] in self.articles:
if statement[current_word+2] not in self.articles:
current_word += 1
else:
self.nouns.append(statement[current_word+1])
current_word += 1
if statement[current_word-1] in self.articles:
pass
else:
current_word += 1
statement.nouns = self.nouns
self.nouns = []
return statement.nouns
class Inspector_tagger (object):
def __init__(self):
self.inspectors = {}
self.keys = ['is', 'was', 'are', 'were', 'if', 'can', 'do', 'does', 'did', 'have', 'has', 'not', 'could', 'would', 'should', 'whether', 'either', 'whenever', 'why', 'will', 'since', 'because', 'that', 'these', 'those', 'i', 'me', 'you', 'we', 'and', 'or', 'when']
def __call__(self, statement):
statement.inspectors = {word:word in statement for word in self.keys}
class Time_tagger (object):
#tagging object meant to classify sentence data types
def __init__(self):
self.past = {'was', 'did', 'used', 'had', 'were', 'came'}
self.future = {'will', 'should', 'could', 'whenever', 'would'}
self.current = {'is', 'do', 'have', 'has', 'does', 'are', 'now'}
def __call__(self, statement):
if len(statement.wordset.intersection(self.past)) >= 1:
statement.time = 'past'
if len(statement.wordset.intersection(self.current)) >= 1:
statement.time = 'current'
if len(statement.wordset.intersection(self.future)) >= 1:
statement.time = 'future'
else:
statement.time = None