CodyKochmann
1/6/2018 - 8:52 PM

This demonstrates how to progressively calculate the linear regression of a python generator to avoid needing to load the whole structure in

This demonstrates how to progressively calculate the linear regression of a python generator to avoid needing to load the whole structure into memory. This is useful when youre trying to analyze infinite streams of data.

'''
This demonstrates how to progressively calculate the linear regression 
of a generator to avoid needing to load the whole structure into memory. 

This is useful when youre trying to analyze infinite streams of data.

by: Cody Kochmann
'''

from operator import mul
from generators import started

@started
def sum_of_squares():
    out = 0
    while 1:
        out += (yield out)**2

@started
def cor():
    out = 0
    while 1:
        out += mul(*(yield out))

@started
def sd():
    _len = 0
    _sum = 0
    _input = 0
    _sum_of_squares = sum_of_squares().send
    while 1:
        _input = yield _sum**2 - _len*_sum_of_squares(_input)
        _len += 1
        _sum += _input

@started
def linear_slope():
    # (sum(x)*sum(y) - len(x)*cor(x,y)) / sd(x)
    x_sum = 0
    y_sum = 0
    x_len = 0
    slope = 0
    _cor = cor().send
    _sd = sd().send
    while 1:
        x,y = yield slope
        x_sum += x
        y_sum += y
        x_len += 1
        _deviation = _sd(x)
        slope = (x_sum*y_sum - x_len*_cor((x,y)))/(_deviation if _deviation!=0 else 1)

@started
def y_intercept():
    # (cor(x,y)*sum(x) - sum_of_squares(x)*sum(y)) / sd(x)
    _cor = cor().send
    x_sum = 0
    y_sum = 0
    _sum_of_squares = sum_of_squares().send
    _sd = sd().send
    intercept = 0
    while 1:
        x,y = yield intercept
        _deviation = _sd(x)
        x_sum += x
        y_sum += y
        intercept = (_cor((x,y))*x_sum - _sum_of_squares(x)*y_sum)/(_deviation if _deviation!=0 else 1)

_sum_of_squares = sum_of_squares().send
_cor = cor().send
_sd = sd().send
_linear_slope = linear_slope().send
_y_intercept = y_intercept().send

for x,y in zip(range(10), range(3,13)):
    #print(_sum_of_squares(x))
    #print(_cor((x,y)))
    #print(_sd(x))
    print(_linear_slope((x,y)))
    print(_y_intercept((x,y)))