CodyKochmann
1/23/2017 - 5:00 PM

Improved BinaryTangent.py !

Improved BinaryTangent.py !

# -*- coding: utf-8 -*-
# @Author: codykochmann
# @Date:   2017-01-23 08:42:55
# @Last Modified 2017-01-23
# @Last Modified time: 2017-01-23 11:57:29

# this is a rewrite of the original OOP based binary tangent


from bitarray import bitarray
from itertools import product
from collections import Counter
from time import time

DEFAULT_DEPTH=7
DEFAULT_FUZZINESS=0.2

class BinaryTangent(bitarray):
    """ makes bitarrays hashable """
    def __init__(self, *args):
        """ constructor """
        super(bitarray, self).__init__()

    def __hash__(self):
        """ returns the number of keys in the dict """
        return hash(self.to01())

    def window(self,window_size):
        """ returns a window of bitarrays with a certain length """
        for i in range(len(self)-window_size+1):
            ii = i+window_size
            yield self[i:ii]

    def similarity(self, target):
        """ returns the similarity between self and a given bitarray
            both self and target need to be the same length """
        assert len(self) == len(target), "needed length of {} and instead got {}".format(len(self),len(target))
        return 1-1.0*sum(self^target)/len(self)

    def fuzzy_matches(self, target, result_size=1):
        """ generates the fuzzy matches of the given target.
            target needs to be at least the length of self. """
        assert len(self) >= len(target)
        required_similarity = 1-DEFAULT_FUZZINESS
        for b in self.window(len(target)+result_size):
            match = b[:-result_size]
            if match.similarity(target) >= required_similarity:
                yield match, b[-result_size:]

    def fuzzy_outcomes(self, target, result_size=1):
        """ same as fuzzy_matches, only just processesing the result. """
        assert len(self) >= len(target)
        required_similarity = 1-DEFAULT_FUZZINESS
        for b in self.window(len(target)+result_size):
            if b[:-result_size].similarity(target) >= required_similarity:
                yield b[-result_size:]

    def count(self, target):
        """ counts the number of times a bitarray occurs within its own """
        assert len(target) <= len(self)
        to01=self.to01()
        return sum(to01==i.to01() for i in self.window(len(target)))

    def future_possibilities(self, future_steps=3):
        """ returns the possible future steps of a given tangent """
        for i in range(future_steps):
            endings = product((False,True), repeat=i+1)
            for e in endings:
                print self+BinaryTangent(e)

    def patterns(self, depth=DEFAULT_DEPTH):
        """ generates every pattern of a given length """
        # use strings so set() doesnt ruin data
        as_strings = set(i.to01() for i in self.window(depth))
        return (BinaryTangent(i) for i in as_strings)

    def generate_confidence_tests(self,depth):
        # returns a tuple with the test set, and the result
        for b in self.window(depth+1):
            yield b[:-1], b[-1]

    def calculate_confidence(self):
        results=([],[],[])
        start_time = time()
        output = {
            'depth':DEFAULT_DEPTH,
            'outcome_success':[{},{}],
            'duration':0.0,
            'depth_prediction':{},
            'invalid_tests':0
        }

        for test_pattern, result in self.generate_confidence_tests(DEFAULT_DEPTH):
            prediction = self.predict_next(test_pattern)
            if 'message' not in prediction: # this filters instances where there isnt enough data
                results[result].append(prediction['confidence_score'])
            else:
                results[2].append(prediction)

        for i in [True,False]:
            if len(results[i]):
                output['outcome_success'][i]={
                    "actual_outcome":i,
                    "occurances_of_1":sum(1 for t in results[i] if t==1.0),
                    "occurances_of_0":sum(1 for t in results[i] if t==0.0)
                }

        output['invalid_tests']=len(results[2])
        output['duration']=time()-start_time

        p = self.predict_next()
        output['prediction']=p

        if 'confidence_score' in p:
            output['outcomes_with_this_confidence']={}
            for i in [True,False]:
                count = sum(1 for n in results[i] if n==p['confidence_score'])
                output['outcomes_with_this_confidence'][str(i)] = count

        return output

    def predict_next(self, target=None):
        """ predicts how likely it is that the next will be true based on what it has learned so far """
        if target is None:
            target = self[-DEFAULT_DEPTH:]
        """
        for match, result in self.fuzzy_matches(target,3):
            print "target: {} found: {} resulting in: {}".format(target,match,result)"""

        # generates the outcomes with a given depth
        outcome_count = 2 # how many events to look into the future
        outcome_list = list(outcome for outcome in self.fuzzy_outcomes(target,outcome_count))
        column_bits = lambda colnum: (i[colnum] for i in outcome_list)
        column_value = lambda colnum: sum(i for i in column_bits(colnum))
        column_values = [column_value(i) for i in range(outcome_count)]

        confidence_score = 1.0*sum(column_values)/(len(outcome_list)*outcome_count)

        if len(outcome_list) < 3:
            return {
                'similar_patterns':0,
                'message':'not enough data'
            }
        else:
            return {
                "confidence_score": confidence_score,
                "similar_patterns": len(outcome_list),
                "previous_patterns": dict(Counter([i.to01() for i in outcome_list]))
            }


if __name__ == '__main__':
    from random import randint
    randbool=lambda:bool(randint(0,1))
    randbitarray=lambda l=7:BinaryTangent((randbool() for i in range(l)))
    randbit_collection=lambda l=7:(randbitarray() for i in range(l))
    
    b = randbitarray(512)
    print b.calculate_confidence()