jmquintana79
3/9/2017 - 6:22 AM

## Multivariate distances for different metrics

Multivariate distances for different metrics

``````## algoritm to calculate multiviriante distances
def cdist_sparse( X, Y, **kwargs ):
from scipy.spatial.distance import cdist
from scipy.sparse import issparse

# todense row at a time, v slow if both v sparse
sxy = 2*issparse(X) + issparse(Y)
if sxy == 0:
return cdist( X, Y, **kwargs )
d = np.empty( (X.shape[0], Y.shape[0]), np.float64 )
if sxy == 2:
for j, x in enumerate(X):
d[j] = cdist( x.todense(), Y, **kwargs ) [0]
elif sxy == 1:
for k, y in enumerate(Y):
d[:,k] = cdist( X, y.todense(), **kwargs ) [0]
else:
for j, x in enumerate(X):
for k, y in enumerate(Y):
d[j,k] = cdist( x.todense(), y.todense(), **kwargs ) [0]
return d

if __name__ == __main__:

# define metric (https://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.html)
metric = "cityblock"
# calculate distances
D = cdist_sparse( X.as_matrix(), TARGET.as_matrix(), metric=metric, p=2 )

"""
D: array of distances [D.shape = len(X) x len(TARGET)]
"""
``````