alexgurrola
2/18/2019 - 9:31 PM

gistfile1.py

from __future__ import with_statement
import random
 
def create_chain(file_paths):
    word_counter = {}
    previous_word = ""
    for path in file_paths:
        with open(path) as file:
            for line in file:
                words = line.split(" ")
                
                for word in words:
                    if word != "":
                        word = word.lower()
                        if previous_word not in word_counter:
                            word_counter[previous_word] = {"total count":0}
                        
                        if word not in word_counter[previous_word]:
                            word_counter[previous_word][word] = 0
                        word_counter[previous_word][word] = word_counter[previous_word][word] + 1
                        word_counter[previous_word]["total count"] = word_counter[previous_word]["total count"] + 1
                        previous_word = word.lower()
    return word_counter
 
def construct_sentence(markov_chain, word_count=300,initial_word=""):
    generated_sentence = ""
    initial_word = initial_word.lower()
    
    
    for i in range(1,word_count):
        updated = False
        while initial_word not in markov_chain:
            initial_word = markov_chain[markov_chain.keys()[random.randrange(0,length(markov_chain))]]
        #assign a probability to all of the possible
        #successive words
        
        #choose a random number between 1 and the total number of words
        word_index_to_use = random.randrange(1, markov_chain[initial_word]["total count"]+1)
        index_count = 0
        #Count thru the occurences until u reach the destination word
        for next_word in markov_chain[initial_word]:
            #"total count" is a special key used to track word frequency.
            if next_word != "total count":
                if word_index_to_use in range(index_count, index_count + markov_chain[initial_word][next_word]+1) or markov_chain[initial_word]["total count"] == 1:
                    if generated_sentence == "":
                        generated_sentence = generated_sentence + " " + initial_word + " " + next_word
                    else:
                        generated_sentence = generated_sentence + " " + next_word
                        
                    if next_word not in markov_chain:
                        next_word = markov_chain[markov_chain.keys()[random.randrange(0,length(markov_chain))]]
                        
                    initial_word = next_word
                    break
                else:
                    index_count = index_count + markov_chain[initial_word][next_word]
    return generated_sentence
 
markov = create_chain(
                      (
                       "/users/darkxanthos/documents/workspace/markovchain/src/documents/bible.txt",
                       "/users/darkxanthos/documents/workspace/markovchain/src/documents/arabiannights.txt",
                       "/users/darkxanthos/documents/workspace/markovchain/src/documents/alice.txt",
                       "/users/darkxanthos/documents/workspace/markovchain/src/documents/taoteching.txt",
                       "/users/darkxanthos/documents/workspace/markovchain/src/documents/communist_manifesto.txt",
                       "/users/darkxanthos/documents/workspace/markovchain/src/documents/portrait.txt",
                       "/users/darkxanthos/documents/workspace/markovchain/src/documents/ulysses.txt",
                       "/users/darkxanthos/documents/workspace/markovchain/src/documents/dubliners.txt"))
#print markov
print construct_sentence(markov_chain = markov, initial_word = "i", word_count=300)