from scipy.stats.mvn import mvnun as mvn_cdf
from scipy.stats import kstwobign, pearsonr
from __future__ import division
def quadct(xx, yy):
n = len(xx)
res = np.empty((n, 4), 'float')
for i in range(n):
ix1, ix2 = xx <= xx[i], yy <= yy[i]
a = np.sum( ix1 & ix2)
b = np.sum( ix1 & ~ix2)
c = np.sum(~ix1 & ix2)
d = n - a - b -c
res[i] = a, b, c, d
return res/n
def quatvl(xx, yy, x0, y0, xvar, yvar, xycov, nsig=4):
cov = np.array([[xvar, xycov], [xycov, xvar]]).T
mean = np.array([x0, y0]).T
xsig5, ysig5 = xvar**0.5*nsig, yvar**0.5*nsig
x1, x2 = x0 - xsig5, x0 + xsig5
y1, y2 = y0 - ysig5, y0 + ysig5
m, n = len(xx), len(x0)
res = np.zeros((m, 4), 'float')
for i in range(m):
for j in range(n):
X0, Y0, X1, X2, Y1, Y2, M, C = xx[i], yy[i], x1[j], x2[j], y1[j],y2[j], mean[j], cov[j]
res[i, 0] += mvn_cdf([X1, Y1], [X0, Y0], M, C)[0]
res[i, 1] += mvn_cdf([X1, Y0], [X0, Y2], M, C)[0]
res[i, 2] += mvn_cdf([X0, Y1], [X2, Y0], M, C)[0]
res[i, 3] += mvn_cdf([X0, Y0], [X2, Y2], M, C)[0]
return res / res.sum(-1, keepdims=True)
def ks2d1s(xx, yy, x0, y0, xvar, yvar, xycov, nsig=4):
"""
Example
x, y = randn(2, 1000)+0.05
print ks2d1s(x, y, [0], [0], [1], [1], [0])
n, m = 9, 2000
print ks2d1s(randn(n), randn(n), zeros(m), zeros(m), ones(m), ones(m), zeros(m))
"""
xx, yy, x0, y0, xvar, yvar, xycov = np.atleast_1d(xx, yy, x0, y0, xvar, yvar, xycov)
a = quadct(xx, yy)
b = quatvl(xx, yy, x0, y0, xvar, yvar, xycov, nsig=4)
D = np.abs(a-b).max()
sqen = np.sqrt(len(xx))
r = np.sqrt(1 - pearsonr(xx, yy)[0]**2)
d = D * sqen/(1 + r*(0.25 - 0.75/sqen))
p = kstwobign.sf(d)
return D, p