means = []
distortions = []
labels = []
centroids = []
for i in range(len(seeds_idx)):
centroids.append(X[seeds_idx[i]])
for i in range(8):
classifications = []
for i in range(K):
classifications.append([])
for featureset in X:
distances = q1_dist2(centroids, X)
labels.append(distances)
classification = np.argmin(distances)
classifications[classification].append(featureset)
prev_centroids = dict(centroids)
for classification in classifications:
centroids[classification] = np.average(classifications[classification],axis=0)
optimized = True
for c in centroids:
original_centroid = prev_centroids[c]
current_centroid = centroids[c]
means.append(np.sum((current_centroid-original_centroid)/original_centroid))
distortion = current_centroid-original_centroid
distortions.append(distortion)
if distortion > 1e-6:
optimized = False
if optimized:
break
return (labels, means, distortions)