04pallav
9/11/2017 - 10:25 PM

kmeans

kmeans

#############################KMEANS minimizes the within-cluster sum of
square distances from the mean

from sklearn.datasets import make_blobs
blobs, classes = make_blobs(500, centers=3) #generating data

from sklearn.cluster import KMeans
kmean = KMeans(n_clusters=3)
kmean.fit(blobs)
kmean.labels_ #this will give the expected label for each point
kmean.cluster_centers_ #will give cluster centers
kmean.transform(blobs)[:5] #output the distance between each point and centroid:

from sklearn import metrics
silhouette_samples = metrics.silhouette_samples(blobs,kmean.labels_)
metrics.silhouette_score(blobs, kmean.labels_) # same as below
silhouette_samples.mean() #calculating the avg silhouette
metrics.normalized_mutual_info_score(ground_truth, kmeans.labels_) #mutual info score