This demonstrates how to progressively calculate the linear regression of a python generator to avoid needing to load the whole structure into memory. This is useful when youre trying to analyze infinite streams of data.
'''
This demonstrates how to progressively calculate the linear regression
of a generator to avoid needing to load the whole structure into memory.
This is useful when youre trying to analyze infinite streams of data.
by: Cody Kochmann
'''
from operator import mul
from generators import started
@started
def sum_of_squares():
out = 0
while 1:
out += (yield out)**2
@started
def cor():
out = 0
while 1:
out += mul(*(yield out))
@started
def sd():
_len = 0
_sum = 0
_input = 0
_sum_of_squares = sum_of_squares().send
while 1:
_input = yield _sum**2 - _len*_sum_of_squares(_input)
_len += 1
_sum += _input
@started
def linear_slope():
# (sum(x)*sum(y) - len(x)*cor(x,y)) / sd(x)
x_sum = 0
y_sum = 0
x_len = 0
slope = 0
_cor = cor().send
_sd = sd().send
while 1:
x,y = yield slope
x_sum += x
y_sum += y
x_len += 1
_deviation = _sd(x)
slope = (x_sum*y_sum - x_len*_cor((x,y)))/(_deviation if _deviation!=0 else 1)
@started
def y_intercept():
# (cor(x,y)*sum(x) - sum_of_squares(x)*sum(y)) / sd(x)
_cor = cor().send
x_sum = 0
y_sum = 0
_sum_of_squares = sum_of_squares().send
_sd = sd().send
intercept = 0
while 1:
x,y = yield intercept
_deviation = _sd(x)
x_sum += x
y_sum += y
intercept = (_cor((x,y))*x_sum - _sum_of_squares(x)*y_sum)/(_deviation if _deviation!=0 else 1)
_sum_of_squares = sum_of_squares().send
_cor = cor().send
_sd = sd().send
_linear_slope = linear_slope().send
_y_intercept = y_intercept().send
for x,y in zip(range(10), range(3,13)):
#print(_sum_of_squares(x))
#print(_cor((x,y)))
#print(_sd(x))
print(_linear_slope((x,y)))
print(_y_intercept((x,y)))