jweinst1
9/4/2015 - 8:33 AM

SentenceObjects.py

SentenceObjects.py

from strmodfunctions import *
#linked list that links togeter a sentence.

class Word (object):
	
	def __init__(self, first=None, rest=None):
		self.first = first
		self.rest = rest
	def get_data(self):
		return self.first
	def get_next(self):
		return self.rest
	def __getitem__(self, i):
		if i ==  0:
			return self.first
		get = self
		while i > 0:
			get = get.rest
			i -= 1
		if get == None:
			raise IndexError('The Sentence Index is Out of Range.')
		return get.first
		
class Fuse_Link (object):
	
	def __init__(self, link1, link2):
		self.chain = link1
		self.chain.rest = link2

class Sentence (object):
	#Holds Data for an indivudal sentence as a linked list.
	
	def __init__(self, list):
		list = self.remove_punc(list)
		index = len(list) - 1
		self.words = Word(list[index])
		while index > 0:
			index -= 1
			self.words = Word(list[index], self.words)
		self.subject, self.predicate = [], []
		self.object = []
		self.word_set()
	def __repr__(self):
		read = []
		current_word = self.words
		while current_word is not None:
			read.append(current_word.first)
			read.append(' ')
			current_word = current_word.rest
		read.pop()
		return ''.join(read)
	def __str__(self):
		read = []
		current_word = self.words
		while current_word is not None:
			read.append(current_word.first)
			read.append(' ')
			current_word = current_word.rest
		read.pop()
		return ''.join(read)
	def __getitem__(self, i):
		if i ==  0:
			return self.words.first
		get = self.words
		while i > 0:
			get = get.rest
			i -= 1
			if get == None:
				raise IndexError #The Sentence Index is Out of Range.
		return get.first
	def __setitem__(self, i, tag):
		if i ==  0:
			self.words.POS_tag = tag
		get = self.words
		while i > 0:
			get = get.rest
			i -= 1
			if get == None:
				raise IndexError #The Sentence Index is Out of Range.
		self.words.POS_tag = tag
	def __len__(self):
		length = 0
		current_word = self.words
		while current_word is not None:
			length += 1
			current_word = current_word.rest
		return length
	def __contains__(self, other):
		if self.words.first == other:
			return True
		else:
			current_word = self.words
			while current_word is not None:
				if current_word.first == other:
					return True
				current_word = current_word.rest
			return False
			
	def remove_punc(self, text):
		if text[-1] == '?':
			self.question = True
			mod = list(text)
			mod.pop()
			mod = ''.join(mod)
			mod = mod.lower()
			return mod.split()
		else:
			self.question = False
			mod = list(text)
			mod.pop()
			mod = ''.join(mod)
			mod = mod.lower()
			return mod.split()
	def word_set(self):
		collection = set({})
		current_word = self.words
		while current_word is not None:
			collection.add(current_word.first)
			current_word = current_word.rest
		self.wordset = collection
	def checktagnum(self, i):
		if i ==  0:
			return self.words.POS_tag
		get = self.words
		while i > 0:
			get = get.rest
			i -= 1
			if get == None:
				raise IndexError #The Sentence Index is Out of Range.
		return get.POS_tag
	
		


class Sentence_Bank (object):
	#A container object that takes a text onject and breaks it into a bin of sentences.
	
	def __init__(self, text):
		self.text = text.split()
		self.bank, self.template = [], []
		index = 0
		while self.text != []:
			if self.template != []:
				if self.template[-1][-1] == '.':
					self.bank.append(Sentence(self.template))
					self.template = []
				else:
					self.template.append(self.text[0])
					del self.text[0]
			else:
				self.template.append(self.text[0])
				del self.text[0]
		self.bank.append(Sentence(self.template))
	def __getitem__(self, i):
		return self.bank[i]
	def __len__(self):
		return len(self.bank)
			
class Word_Ref (object):
	#used for part of speech tagging, and word look up.
	
	def __init__(self, selection):
		if selection == 'Verbs':
			wordfile = open('Verbs.txt', 'r')
			wordstring = wordfile.read()
			self.reference = wordstring.split()
		elif selection == 'Nouns':
			wordfile = open('Nouns.txt', 'r')
			wordstring = wordfile.read()
			self.reference = wordstring.split()
		elif selection == 'Adjectives':
			wordfile = open('Adjectives.txt', 'r')
			wordstring = wordfile.read()
			self.reference = wordstring.split()
		elif selection == 'Adverbs':
			wordfile = open('Adverbs.txt', 'r')
			wordstring = wordfile.read()
			self.reference = wordstring.split()
		else:
			raise ReferenceError('Must choose a valid reference library.')
	def __contains__(self, other):
		if other[-1] == ',':
			return other[:-1] in self.reference
		else:
			return other in self.reference
			
def wordref_tester(text):
	nouns = Word_Ref('Nouns')
	verbs = Word_Ref('Verbs')
	adjectives = Word_Ref('Adjectives')
	adverbs = Word_Ref('Adverbs')
	mod = list(text)
	mod.pop()
	mod = ''.join(mod)
	mod = mod.lower()
	mod = mod.split()
	current_word = 0
	while current_word < len(mod)-1:
		if mod[current_word] in nouns:
			current_word += 1
		if mod[current_word] in verbs:
			current_word += 1
		if mod[current_word] in adverbs:
			current_word += 1
		if mod[current_word] in adjectives:
			current_word += 1
		else:
			return mod[current_word]
	return 'No Leaks'
class SP_Tagger (object):
	#tags the subject and predicate of a sentence.
	def __init__(self, target):
		assert target.__class__ == Sentence #Must take a Sentence Object.
		self.target = target
		self.subject_words = ['the', 'in', 'and', 'or', 'with', 'as', 'at', 'he', 'his', 'she', 'her', 'i', 'a']
		self.prepredicate_words = ['if', 'in', 'either', 'when', 'whether', 'however']
		self.predicate_words = ['do', 'did', 'was', 'is', 'are', 'were', 'can', 'cannot', 'wont', 'come', 'like', 'came', 'will']
	def __repr__(self):
		if self.target.subject and self.target.predicate != []:
			return join_with_spaces(self.target.subject), join_with_spaces(self.target.predicate)
		else:
			return None
	def makelink(self, oldlink, newlink):
		if oldlink.rest == None:
			oldlink.rest = newlink
			return oldlink
		else:
			oldlink = oldlink.rest
			self.makelink(oldlink, newlink)
	def append_subject(self, string):
		self.target.subject.append(string)
	def append_predicate(self, string):
		self.target.predicate.append(string)
	def tag(self):
		writer_routes = ['subject', 'predicate', 'pre-predicate']
		indicator = None
		nouns = Word_Ref('Nouns')
		verbs = Word_Ref('Verbs')
		adjectives = Word_Ref('Adjectives')
		adverbs = Word_Ref('Adverbs')
		current_word = 0
		while current_word < len(self.target)-1:
			if len(self.target.subject) == 0 and len(self.target.predicate) == 0 :
				#if self.target[current_word] in self.prepredicate_words:
					#self.append_predicate(self.target[current_word])
					#indicator = 'pre-predicate'
					#current_word += 1
				if self.target[current_word] in self.subject_words:
					self.append_subject(self.target[current_word])
					indicator = 'subject'
					current_word += 1
				if self.target[current_word] in nouns:
					self.append_subject(self.target[current_word])
					indicator = 'subject'
					current_word += 1
				if self.target[current_word] in adjectives:
					self.append_subject(self.target[current_word])
					indicator = 'subject'
					current_word += 1
				#if self.target[current_word] in adverbs:
					#self.append_predicate(self.target[current_word])
					#indicator = 'pre-predicate'
					#current_word += 1
				#if self.target[current_word] in verbs:
					#self.append_predicate(self.target[current_word])
					#indicator = 'pre-predicate'
					#current_word += 1
				else:
					self.append_subject(self.target[current_word])
					indicator = 'subject'
					current_word += 1
			if indicator == 'pre-predicate':
				if self.target[current_word][-1] == ',':
					self.append_predicate(self.target[current_word])
					indicator = 'subject'
					current_word += 1
				else:
					self.append_predicate(self.target[current_word])
					current_word += 1
			if indicator == 'subject':
				if self.target[current_word] in nouns:
					self.append_subject(self.target[current_word])
					current_word += 1
				if self.target[current_word] in adjectives:
					self.append_subject(self.target[current_word])
					current_word += 1
				if self.target[current_word] in self.subject_words:
					self.append_subject(self.target[current_word])
					current_word += 1
				if self.target[current_word] in self.predicate_words:
					self.append_predicate(self.target[current_word])
					indicator = 'predicate'
					current_word += 1
				if self.target[current_word] in verbs:
					self.append_predicate(self.target[current_word])
					indicator = 'predicate'
					current_word += 1
				if self.target[current_word] in adverbs:
					self.append_predicate(self.target[current_word])
					indicator = 'predicate'
					current_word += 1
				else:
					print self.target[current_word]
					raise ReferenceError('Word not identifiable')
			if indicator == 'predicate':
				self.append_predicate(self.target[current_word])
				current_word += 1
			else:
				print self.target[current_word]
				print indicator
				raise ReferenceError('Incorrect set of conditionals being looped')
		return join_with_spaces(self.target.subject), join_with_spaces(self.target.predicate)

#Tester Function for Subject_Predicate tagging
def SP_tagtester(text):
	assert text[-1] == '.' #valid sentence text
	test = Sentence(text)
	tagger = SP_Tagger(test)
	return tagger.tag()
	
class Compound_tagger (object):
	#Used for tagging if a sentence is compounded.
	def __init__(self):
		self.compound_chars = {',', ':', ';'}
	def __call__(self, words):
		assert words.__class__ == Sentence #Must be a Valid Sentence Object.
		if words.words.first[-1] in self.compound_chars:
			words.compound = True
		else:
			current_word = words.words
			while current_word is not None:
				if current_word.first[-1] in self.compound_chars:
					words.compound = True
					return None
				current_word = current_word.rest
			words.compound = False
			
class Noun_tagger (object):
	
	def __init__(self):
		self.nouns = []
		self.articles = {'the', 'a', 'an'}
		self.conditionals = {'if', 'either'}
	def __call__(self, statement):
		current_word = 0
		while current_word < len(statement)-1:
			if statement[current_word] in self.conditionals:
				self.nouns.append(statement[current_word+1])
				current_word += 1
			if statement[current_word] in self.articles:
				if statement[current_word+2] not in self.articles:
					current_word += 1
				else:
					self.nouns.append(statement[current_word+1])
					current_word += 1
			if statement[current_word-1] in self.articles:
				pass
			else:
				current_word += 1
		statement.nouns = self.nouns
		self.nouns = []
		return statement.nouns

class Inspector_tagger (object):
	
	def __init__(self):
		self.inspectors = {}
		self.keys = ['is', 'was', 'are', 'were', 'if', 'can', 'do', 'does', 'did', 'have', 'has', 'not', 'could', 'would', 'should', 'whether', 'either', 'whenever', 'why', 'will', 'since', 'because', 'that', 'these', 'those', 'i', 'me', 'you', 'we', 'and', 'or', 'when']
	def __call__(self, statement):
		statement.inspectors = {word:word in statement for word in self.keys}

class Time_tagger (object):
	#tagging object meant to classify sentence data types
	def __init__(self):
		self.past = {'was', 'did', 'used', 'had', 'were', 'came'}
		self.future = {'will', 'should', 'could', 'whenever', 'would'}
		self.current = {'is', 'do', 'have', 'has', 'does', 'are', 'now'}
	def __call__(self, statement):
		if len(statement.wordset.intersection(self.past)) >= 1:
			statement.time = 'past'
		if len(statement.wordset.intersection(self.current)) >= 1:
			statement.time = 'current'
		if len(statement.wordset.intersection(self.future)) >= 1:
			statement.time = 'future'
		else:
			statement.time = None