masdeseiscaracteres
4/17/2017 - 6:44 AM

Rolling window computations

Rolling window computations

import numpy as np
import scipy.signal as sig

# Rolling sum
def rolling_sum(x, n, axis=0):
    ma_filter = np.ones(n)
    return sig.lfilter(ma_filter, 1, x, axis=axis)

# Rolling mean
def rolling_mean(x, n, axis=0):
    ma_filter = np.ones(n)*(1/n)
    return sig.lfilter(ma_filter, 1, x, axis=axis) 

# Rolling variance
def rolling_var(x, n, axis=0):
    ma_filter = np.ones(n)*(1/n)
    return sig.lfilter(ma_filter, 1, x ** 2, axis=axis) - sig.lfilter(ma_filter, 1, x, axis=axis) ** 2

# Rolling covariance
def rolling_cov(x, y, n, axis=0):
    ma_filter = np.ones(n)*(1/n)
    return sig.lfilter(ma_filter, 1, x * y, axis=axis) \
           - sig.lfilter(ma_filter, 1, x, axis=axis) * sig.lfilter(ma_filter, 1, y, axis=axis)

# Rolling linear regression (slope, y-intercept, squared Pearson's correlation coefficient)
def rolling_linearreg(x, y, n, axis=0):
    ma_filter = np.ones(n) * (1 / n)
    rx = sig.lfilter(ma_filter, 1, x, axis=axis)
    ry = sig.lfilter(ma_filter, 1, y, axis=axis)
    rxx = sig.lfilter(ma_filter, 1, x ** 2, axis=axis)
    ryy = sig.lfilter(ma_filter, 1, y ** 2, axis=axis)
    rxy = sig.lfilter(ma_filter, 1, x * y, axis=axis)

    covarxy = rxy - rx * ry
    varx = rxx - rx ** 2

    m = covarxy / varx  # slope
    n = ry - m * rx  # y-intercept
    r2 = covarxy ** 2 / (varx * (ryy - ry ** 2))
    return m,n,r2
import numpy as np
import pandas as pd
import scipy.signal as sig
import skimage
import warnings

a = np.arange(100000, dtype=np.float).reshape(1000,-1)
#s = pd.DataFrame(a)

s= pd.Series(np.arange(100, dtype=np.float))

step = 20
window_size = 50
offset = 13
axis = 0

def rolling_sum1(a, window, step, offset=0, axis=0):
    arr = np.asarray(a)

    sel = [slice(None)] * arr.ndim
    sel[axis] = slice(offset, None, None)
    arr = arr[sel]

    if window > arr.shape[axis]:
        sh = list(arr.shape)
        sh[axis] = 0
        return np.empty(sh, dtype=arr.dtype)

    if arr.ndim in (1, 2):
        tup = list(arr.shape)
        tup[axis] = window
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', RuntimeWarning)
            rolled_arr = skimage.util.view_as_windows(arr, tuple(tup), step)
    else:
        raise ValueError('Only 1- or 2-dimensional objects allowed.')

    out = rolled_arr.sum(arr.ndim+axis)

    if arr.ndim == 2:
        out = out.swapaxes(-1, 1-axis)
        out = out.squeeze(-1)
    return out

def rolling_sum2(a, window, step, offset=0, axis=0):
    df = pd.DataFrame(a)
    transient_len = window - 1
    sel_tuple = (slice(None), slice(transient_len + offset,None,step))[::2 * axis - 1]
    out = df.rolling(window, 0, axis=axis).sum().iloc[sel_tuple].values
    if a.ndim == 1:
        out = out.squeeze()
    return out


def rolling_sum3(a, window, step, offset=0, axis=0):
    h = np.ones(window)

    arr = np.asarray(a)
    sel = [slice(None)]*arr.ndim
    sel[axis] = slice(offset, None, None)
    arr = arr[sel]

    if window > arr.shape[axis]:
        sh = list(arr.shape)
        sh[axis] = 0
        return np.empty(sh, dtype=arr.dtype)

    pad = (1 - window) % step # initial padding
    pad_sh = (pad,) + ((arr.shape[1-axis],) if arr.ndim == 2 else ())
    arr = np.insert(arr, 0, np.zeros(pad_sh), axis=axis) # add zeros for offset = 0

    initial_transient_len = (window+pad-1)//step # initial transient length is always an integer number of steps because of padding
    final_transient_len = (window-1)//step

    # print('Padding:', pad)
    # r1 = sig.upfirdn(h, arr, up=1, down=1)
    # print('Rate=1:1', r1)
    # base = np.ones_like(r1)+np.nan
    # rd = sig.upfirdn(h, arr, up=1, down=step, axis=axis)
    # tmp = base[offset::step]
    # base[offset::step] = rd[:len(tmp)]
    # print('Rate=1:d', base)
    # print('Transient lengths:', (initial_transient_len,final_transient_len))
    sel[axis] = slice(initial_transient_len,
                                    None if final_transient_len==0 else -final_transient_len, None)
    return sig.upfirdn(h, arr, up=1, down=step, axis=axis)[sel]


def rolling_sum4(a, window, step, offset=0, axis=0):
    h = np.ones(window)

    arr = np.asarray(a)
    sel = [slice(None)]*arr.ndim
    sel[axis] = slice(offset, None, None)
    arr = arr[sel]

    transient_len = window - 1 # initial transient length

    sel[axis] = slice(transient_len,None,step)
    return sig.lfilter(h, 1, arr, axis=axis)[sel]

%timeit s1 = rolling_sum1(s, window_size, step, offset, axis)
%timeit s2 = rolling_sum2(s, window_size, step, offset, axis)
%timeit s3 = rolling_sum3(s, window_size, step, offset, axis)
%timeit s4 = rolling_sum4(s, window_size, step, offset, axis)