romanitalian
10/3/2016 - 8:18 PM

Help to find duplicate files in folder and sub-foldes

Help to find duplicate files in folder and sub-foldes

#!/usr/bin/python
import os
import sys


def getUniqueKey(fp):
    """
    Calculate unique as integer value of last date-time changed of file and his size
    :param fp:
    :return:
    """
    dt = os.path.getmtime(fp)
    sz = os.path.getsize(fp)
    return sz + int(dt)


def findDupcateFiles(folder_name):
    """
    return Dictionary of duplicated files in dir and sub dir
    :param folder_name:
    :return:
    """
    list_of_all_files = {}
    for dir, subDirs, files in os.walk(folder_name):
        for f in files:
            f_path = os.path.join(dir, f)
            k = getUniqueKey(f_path)
            if k in list_of_all_files:
                list_of_all_files[k].append(f_path)
            else:
                list_of_all_files[k] = [f_path]
    duplicates = list(filter(lambda x: len(x) > 1, list_of_all_files.values()))
    return duplicates


def printDupcates(duplicates):
    """
    Pretty print of result - finding of duplicated files
    :param duplicates:
    :return:
    """
    if len(duplicates) > 0:
        for dup in duplicates:
            print('Duplicates files: (by filse size and last date-time changes):', end="\n")
            print('-----------')
            for d in dup:
                print(d, end="\n")
            print('-----------')
    else:
        print("Duplicated files: not found.", end="\n")


if __name__ == '__main__':
    if len(sys.argv) > 1:
        dups = findDupcateFiles(sys.argv[1])
        printDupcates(dups)
    else:
        print("Usage: python " + __file__ + " folder_name", end="\n")