Improved BinaryTangent.py !
# -*- coding: utf-8 -*-
# @Author: codykochmann
# @Date: 2017-01-23 08:42:55
# @Last Modified 2017-01-23
# @Last Modified time: 2017-01-23 11:57:29
# this is a rewrite of the original OOP based binary tangent
from bitarray import bitarray
from itertools import product
from collections import Counter
from time import time
DEFAULT_DEPTH=7
DEFAULT_FUZZINESS=0.2
class BinaryTangent(bitarray):
""" makes bitarrays hashable """
def __init__(self, *args):
""" constructor """
super(bitarray, self).__init__()
def __hash__(self):
""" returns the number of keys in the dict """
return hash(self.to01())
def window(self,window_size):
""" returns a window of bitarrays with a certain length """
for i in range(len(self)-window_size+1):
ii = i+window_size
yield self[i:ii]
def similarity(self, target):
""" returns the similarity between self and a given bitarray
both self and target need to be the same length """
assert len(self) == len(target), "needed length of {} and instead got {}".format(len(self),len(target))
return 1-1.0*sum(self^target)/len(self)
def fuzzy_matches(self, target, result_size=1):
""" generates the fuzzy matches of the given target.
target needs to be at least the length of self. """
assert len(self) >= len(target)
required_similarity = 1-DEFAULT_FUZZINESS
for b in self.window(len(target)+result_size):
match = b[:-result_size]
if match.similarity(target) >= required_similarity:
yield match, b[-result_size:]
def fuzzy_outcomes(self, target, result_size=1):
""" same as fuzzy_matches, only just processesing the result. """
assert len(self) >= len(target)
required_similarity = 1-DEFAULT_FUZZINESS
for b in self.window(len(target)+result_size):
if b[:-result_size].similarity(target) >= required_similarity:
yield b[-result_size:]
def count(self, target):
""" counts the number of times a bitarray occurs within its own """
assert len(target) <= len(self)
to01=self.to01()
return sum(to01==i.to01() for i in self.window(len(target)))
def future_possibilities(self, future_steps=3):
""" returns the possible future steps of a given tangent """
for i in range(future_steps):
endings = product((False,True), repeat=i+1)
for e in endings:
print self+BinaryTangent(e)
def patterns(self, depth=DEFAULT_DEPTH):
""" generates every pattern of a given length """
# use strings so set() doesnt ruin data
as_strings = set(i.to01() for i in self.window(depth))
return (BinaryTangent(i) for i in as_strings)
def generate_confidence_tests(self,depth):
# returns a tuple with the test set, and the result
for b in self.window(depth+1):
yield b[:-1], b[-1]
def calculate_confidence(self):
results=([],[],[])
start_time = time()
output = {
'depth':DEFAULT_DEPTH,
'outcome_success':[{},{}],
'duration':0.0,
'depth_prediction':{},
'invalid_tests':0
}
for test_pattern, result in self.generate_confidence_tests(DEFAULT_DEPTH):
prediction = self.predict_next(test_pattern)
if 'message' not in prediction: # this filters instances where there isnt enough data
results[result].append(prediction['confidence_score'])
else:
results[2].append(prediction)
for i in [True,False]:
if len(results[i]):
output['outcome_success'][i]={
"actual_outcome":i,
"occurances_of_1":sum(1 for t in results[i] if t==1.0),
"occurances_of_0":sum(1 for t in results[i] if t==0.0)
}
output['invalid_tests']=len(results[2])
output['duration']=time()-start_time
p = self.predict_next()
output['prediction']=p
if 'confidence_score' in p:
output['outcomes_with_this_confidence']={}
for i in [True,False]:
count = sum(1 for n in results[i] if n==p['confidence_score'])
output['outcomes_with_this_confidence'][str(i)] = count
return output
def predict_next(self, target=None):
""" predicts how likely it is that the next will be true based on what it has learned so far """
if target is None:
target = self[-DEFAULT_DEPTH:]
"""
for match, result in self.fuzzy_matches(target,3):
print "target: {} found: {} resulting in: {}".format(target,match,result)"""
# generates the outcomes with a given depth
outcome_count = 2 # how many events to look into the future
outcome_list = list(outcome for outcome in self.fuzzy_outcomes(target,outcome_count))
column_bits = lambda colnum: (i[colnum] for i in outcome_list)
column_value = lambda colnum: sum(i for i in column_bits(colnum))
column_values = [column_value(i) for i in range(outcome_count)]
confidence_score = 1.0*sum(column_values)/(len(outcome_list)*outcome_count)
if len(outcome_list) < 3:
return {
'similar_patterns':0,
'message':'not enough data'
}
else:
return {
"confidence_score": confidence_score,
"similar_patterns": len(outcome_list),
"previous_patterns": dict(Counter([i.to01() for i in outcome_list]))
}
if __name__ == '__main__':
from random import randint
randbool=lambda:bool(randint(0,1))
randbitarray=lambda l=7:BinaryTangent((randbool() for i in range(l)))
randbit_collection=lambda l=7:(randbitarray() for i in range(l))
b = randbitarray(512)
print b.calculate_confidence()