OMENSAH
4/22/2018 - 11:08 PM

kmeans.py

 means = []
    distortions = []
    labels = []

    centroids = []  
    for i in range(len(seeds_idx)):
        centroids.append(X[seeds_idx[i]])
    
    for i in range(8):
        classifications = []
        
        for i in range(K):
            classifications.append([])

        for featureset in X:
            distances = q1_dist2(centroids, X)
            labels.append(distances)
            classification = np.argmin(distances)          
            classifications[classification].append(featureset)
        prev_centroids = dict(centroids)

        for classification in classifications:
            centroids[classification] = np.average(classifications[classification],axis=0)

        optimized = True
        for c in centroids:
            original_centroid = prev_centroids[c]
            current_centroid = centroids[c]
            means.append(np.sum((current_centroid-original_centroid)/original_centroid))
            distortion = current_centroid-original_centroid
            distortions.append(distortion)
            if distortion > 1e-6:
                optimized = False    

            if optimized:
                break
    return (labels, means, distortions)