serelk
2/14/2016 - 10:39 PM

file_duplicate

import os
import random
import shutil
import hashlib


def file_generator():
    init_n = 9
    for i in range(init_n):
        fname = dirname + "foo" + str(i)
        with open(fname, 'wb') as fout:
            fout.write(os.urandom(1024))
    for y in range(4):
        shutil.copy(dirname + 'foo' + str(random.randint(0,(init_n - 1))),dirname + 'foo' + str(y+10))


def dir_to_hash():
    dir_dict = {}
    for f in  os.listdir(dirname):
        hvalue = hashlib.md5(open(dirname + f  ,'rb').read()).hexdigest()
        if hvalue  not in dir_dict:
            dir_dict[hvalue] = list()
        dir_dict[hvalue].append(f)
    return dir_dict.items()


if __name__ == "__main__":
    dirname = "/Users/serelk/Projects/FileComparison/FOODIR/"
    file_generator()
    for k,v in dir_to_hash():
        if len(v) > 1:
            print v