Rolling window computations
Some alternatives:
Fast implementations of commonly used linear and non-linear filters:
import numpy as np
import scipy.signal as sig
# Rolling sum
def rolling_sum(x, n, axis=0):
ma_filter = np.ones(n)
return sig.lfilter(ma_filter, 1, x, axis=axis)
# Rolling mean
def rolling_mean(x, n, axis=0):
ma_filter = np.ones(n)*(1/n)
return sig.lfilter(ma_filter, 1, x, axis=axis)
# Rolling variance
def rolling_var(x, n, axis=0):
ma_filter = np.ones(n)*(1/n)
return sig.lfilter(ma_filter, 1, x ** 2, axis=axis) - sig.lfilter(ma_filter, 1, x, axis=axis) ** 2
# Rolling covariance
def rolling_cov(x, y, n, axis=0):
ma_filter = np.ones(n)*(1/n)
return sig.lfilter(ma_filter, 1, x * y, axis=axis) \
- sig.lfilter(ma_filter, 1, x, axis=axis) * sig.lfilter(ma_filter, 1, y, axis=axis)
# Rolling linear regression (slope, y-intercept, squared Pearson's correlation coefficient)
def rolling_linearreg(x, y, n, axis=0):
ma_filter = np.ones(n) * (1 / n)
rx = sig.lfilter(ma_filter, 1, x, axis=axis)
ry = sig.lfilter(ma_filter, 1, y, axis=axis)
rxx = sig.lfilter(ma_filter, 1, x ** 2, axis=axis)
ryy = sig.lfilter(ma_filter, 1, y ** 2, axis=axis)
rxy = sig.lfilter(ma_filter, 1, x * y, axis=axis)
covarxy = rxy - rx * ry
varx = rxx - rx ** 2
m = covarxy / varx # slope
n = ry - m * rx # y-intercept
r2 = covarxy ** 2 / (varx * (ryy - ry ** 2))
return m,n,r2import numpy as np
import pandas as pd
import scipy.signal as sig
import skimage
import warnings
a = np.arange(100000, dtype=np.float).reshape(1000,-1)
#s = pd.DataFrame(a)
s= pd.Series(np.arange(100, dtype=np.float))
step = 20
window_size = 50
offset = 13
axis = 0
def rolling_sum1(a, window, step, offset=0, axis=0):
arr = np.asarray(a)
sel = [slice(None)] * arr.ndim
sel[axis] = slice(offset, None, None)
arr = arr[sel]
if window > arr.shape[axis]:
sh = list(arr.shape)
sh[axis] = 0
return np.empty(sh, dtype=arr.dtype)
if arr.ndim in (1, 2):
tup = list(arr.shape)
tup[axis] = window
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
rolled_arr = skimage.util.view_as_windows(arr, tuple(tup), step)
else:
raise ValueError('Only 1- or 2-dimensional objects allowed.')
out = rolled_arr.sum(arr.ndim+axis)
if arr.ndim == 2:
out = out.swapaxes(-1, 1-axis)
out = out.squeeze(-1)
return out
def rolling_sum2(a, window, step, offset=0, axis=0):
df = pd.DataFrame(a)
transient_len = window - 1
sel_tuple = (slice(None), slice(transient_len + offset,None,step))[::2 * axis - 1]
out = df.rolling(window, 0, axis=axis).sum().iloc[sel_tuple].values
if a.ndim == 1:
out = out.squeeze()
return out
def rolling_sum3(a, window, step, offset=0, axis=0):
h = np.ones(window)
arr = np.asarray(a)
sel = [slice(None)]*arr.ndim
sel[axis] = slice(offset, None, None)
arr = arr[sel]
if window > arr.shape[axis]:
sh = list(arr.shape)
sh[axis] = 0
return np.empty(sh, dtype=arr.dtype)
pad = (1 - window) % step # initial padding
pad_sh = (pad,) + ((arr.shape[1-axis],) if arr.ndim == 2 else ())
arr = np.insert(arr, 0, np.zeros(pad_sh), axis=axis) # add zeros for offset = 0
initial_transient_len = (window+pad-1)//step # initial transient length is always an integer number of steps because of padding
final_transient_len = (window-1)//step
# print('Padding:', pad)
# r1 = sig.upfirdn(h, arr, up=1, down=1)
# print('Rate=1:1', r1)
# base = np.ones_like(r1)+np.nan
# rd = sig.upfirdn(h, arr, up=1, down=step, axis=axis)
# tmp = base[offset::step]
# base[offset::step] = rd[:len(tmp)]
# print('Rate=1:d', base)
# print('Transient lengths:', (initial_transient_len,final_transient_len))
sel[axis] = slice(initial_transient_len,
None if final_transient_len==0 else -final_transient_len, None)
return sig.upfirdn(h, arr, up=1, down=step, axis=axis)[sel]
def rolling_sum4(a, window, step, offset=0, axis=0):
h = np.ones(window)
arr = np.asarray(a)
sel = [slice(None)]*arr.ndim
sel[axis] = slice(offset, None, None)
arr = arr[sel]
transient_len = window - 1 # initial transient length
sel[axis] = slice(transient_len,None,step)
return sig.lfilter(h, 1, arr, axis=axis)[sel]
%timeit s1 = rolling_sum1(s, window_size, step, offset, axis)
%timeit s2 = rolling_sum2(s, window_size, step, offset, axis)
%timeit s3 = rolling_sum3(s, window_size, step, offset, axis)
%timeit s4 = rolling_sum4(s, window_size, step, offset, axis)