onlyforbopi
6/12/2017 - 4:11 PM

Python.Ready.ReadyApps

Python.Ready.ReadyApps #python #Python #ready #apps #readyapps #binarygenerator #lineslicer #columnslicer #keycounter #compare

READYAPPS

1. (READYAPP) BinaryGenerator.py 
2. (READYAPP) LineSlicer.py
3. (READYAPP) ColumnSlicer.py
4. (READYAPP) ColumnSlicer.py with fully functional logging system
5. (READYAPP) Keycounter.py / Keycounteradv.py
6. (READYAPP) File_Compare_v1.py 
7. (READYAPP) Get_large_file_sizes.py
8. (READYAPP) Remove Duplicates from file
9. (READYAPP) Hexdump Implementation
10.(READYAPP) FQ.py (File Query )
11.(READYAPP) GroupingSplitter.py
12.(READYAPP) TextMerger.py
13.(READYAPP) DirectoryCmp.py

# -*- coding: utf-8 -*-
"""
Created on Thu Apr  6 04:36:32 2017

@author: panagos
"""



#######################################
# LIBRARIES
import os
import sys
import time
from datetime import datetime
import timeit

#######################################
# FUNCTIONS


def script_path_param(string1):
    # PUT IT INTO MODULE
    #import sys
    import os
    script_name = os.path.basename(string1)
    script_dir = os.path.dirname(os.path.realpath(string1))
    script_full = string1
    return ( script_name, script_dir, script_full )


def script_time_param():
    # Name: time parameter
    # Function: script_time_param
    # Input: None
    # Output: string with formatted time
    # Usage: print (script_time_param) or a = script_time_param
    return time.strftime("%c")

def usage(string1):
    print ("\n" + "Name: " + str(os.path.basename(string1)))
    print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
    return True


def inputscreenmenu():
    print ('*' * 60)
    print ('*' + " Binary converter " )
    print ('*' * 60)
    print ('*' + " Operations: \n" + '*')
    print ("*" + "\t" + "1. From Binary")
    print ("*" + "\t" + "2. To Binary" + "\n" + '*')
    print ('*' * 60)
    print ('*')
    x = input('*' + " Choose Mode: ",)
    print ('*' + " Script Starting ...")
    return x
    

def timing_val(func):
    def wrapper(*arg, **kw):
        '''source: http://www.daniweb.com/code/snippet368.html'''
        t1 = time.time()
        res = func(*arg, **kw)
        t2 = time.time()
        print ('%r (%r, %r) %2.2f sec' % \
              (__name__, arg, kw, t1-t2))
        return (t2 - t1), res, func.__name__
    return wrapper


#######################################
# WORK FUNCTIONS - TO DO

# Checks and controls
# Greeting string
# 2 modes of operation, to and from
# Output on screen

#st = "hello"

def to_binary(string1):
    return ''.join('{0:08b}'.format(ord(x), 'b') for x in string1)

#print(to_binary(st))

def bin_tonormal(string1):
    return ''.join([chr(int(x, 2)) for x in string1])

# Does not work
#print (bin_tonormal("0110100001100101011011000110110001101111"))

def bintoascii(stri):
    listform = []
    a = ''.join(chr(int(stri[i:i+8], 2)) for i in range(0, len(str(stri)), 8))
    for j in a:
        listform.append(str(j))
    return (a, listform)

#print (bintoascii("0110100001100101011011000110110001101111")[0])
#print (bintoascii("0110100001100101011011000110110001101111")[1])

def string2bits(s=''):
    return [bin(ord(x))[2:].zfill(8) for x in s]

#@timing_val
def bits2string(b=None):
    return ''.join([chr(int(x, 2)) for x in b])

#s = input("Enter String: ")
#b = string2bits(s)
#s2 = bits2string(b)

#print ('String:')
#print (s)

#print (b)
#print (s2)

#print ('\nList of Bits:')
#for x in b:
#    print (x, end="")

#print ('\nString:')
#print (s2)

############################################
# 

starttime = datetime.now()
print ("@" + "\n" + "Script started at " + script_time_param() + "\n")

print ("Script parameters: ")
print (str(script_path_param(sys.argv[0])[0]))
print (str(script_path_param(sys.argv[0])[1]))
print (str(script_path_param(sys.argv[0])[2]))
print ("\n")

choice = inputscreenmenu()
#print (choice)

if choice == '1':
    source = input("Enter binary string for conversion: ")
    target = bintoascii(source)
    #print (source)
    #print (target)
    #print ("Specific alphanumeric digits are stored in List:Target")
    print ("Equivalint alphanumeric: " + str((target)[0]))
    print ("Equivalent alphanumeric in list form: " + str((target)[1]))
elif choice == '2':
    source = input("Enter alphanumeric string for conversion: ")
    target = to_binary(source)
    target2 = string2bits(source)
    #print ("Specific binary digits are stored in List:Target")
    print ("Equivalint binary: " + str(target))
    print ("Equivalent binary in list form: " + str(target2))    


    
print ("\n"+ "@\n" + "Script ended at " + script_time_param())
print ("Elapsed duration: " + str(datetime.now() - starttime))
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017

@author: P.Doulgeridis
"""




import os
import sys
import time


file_in = sys.argv[1]
start_in = int(sys.argv[2]) - 1
end_in = int(sys.argv[3]) - 1
file_ot_00 = str(file_in) + ".proc"
output_file_list = []

def usage(string1):
    print ("\n" + "Name: " + str(os.path.basename(string1)))
    print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
    return True


def chck_args_num(var):
    '''
    Function : chck_args_num(var)
    Description: Checks n. of variables
    Input: Integer
    Output: Boolean
    Called as : chk_args(len(sys.argv))
    '''
    import sys
    args_correct = 3
    args_in = var
    args_in_fixed = args_in - 1
    if args_in_fixed != args_correct:
        print ('Wrong number of arguments : ' + str(args_in_fixed))
        print ('Must be : ' + str(args_correct))
        return False
    else:
        print ('Correct number of arguments provided: ' + str(args_in_fixed) + "\n" )
        return True

def script_path_param(string1):
    # PUT IT INTO MODULE
    #import sys
    #import os
    script_name = os.path.basename(string1)
    script_dir = os.path.dirname(os.path.realpath(__file__)) + os.sep
    script_full = script_dir + script_name
    return ( script_name, script_dir, script_full )


def script_time_param():
    # Name: time parameter
    # Function: script_time_param
    # Input: None
    # Output: string with formatted time
    # Usage: print (script_time_param) or a = script_time_param
    return time.strftime("%c")

def count_file(file):
    chars = words = lines = 0
    with open(str(file), 'r') as in_file:
        for line in in_file:
            lines += 1
            words += len(line.split())
            chars += len(line)
    return (lines, words, chars)

def filetoliststrip(file):
    '''
  Function: filetoliststrip
  Description: Reads a file, stores in list (stripped)
  Input: File
  Output: List
  Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
  Notes: Path needs double \\ or reverse /
  '''
    file_in = str(file)
    lines = list(open(file_in, 'r'))
    content = [x.strip() for x in lines] 
    return content


#######################################
# SCRIPT START
print ("@@\n" + Script started at " + script_time_param() + "\n" )

print ("Script basic parameters: ")
print ("Script name" + script_path_param(sys.argv[0])[0])
print ("Script directory" + script_path_param(sys.argv[0])[1])
print ("Script full path" + script_path_param(sys.argv[0])[2])

    
print ("Parsing input file..\n")
try:
    input_file = filetoliststrip(file_in)
except:
    print ("ERROR: Parsing input file.\n")

# slice out the required part (lines) and add to output list
print ("Trimming input file..\n")
try:
    output_file_list = input_file[start_in:end_in]
except:
    print ("ERROR: Trimming input file\n")

# iterate over output list and print
print ('Writing to output..\n')
try:
    ot1_file = open(file_ot_00, "w")
except:
    print ("ERROR: Opening output file.\n")

for j in output_file_list:
    ot1_file.write(j + "\n")

ot1_file.close()

print ("Script finished at " + script_time_param() + "\n" )
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017

@author: P.Doulgeridis
"""




import os
import sys
import time


file_in = sys.argv[1]
start_in = int(sys.argv[2]) - 1
file_ot_00 = str(file_in) + ".proc.col"
output_file_list = []


def usage(string1):
    print ("\n" + "Name: " + str(os.path.basename(string1)))
    print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
    return True


def chck_args_num(var):
    '''
    Function : chck_args_num(var)
    Description: Checks n. of variables
    Input: Integer
    Output: Boolean
    Called as : chk_args(len(sys.argv))
    '''
    import sys
    args_correct = 3
    args_in = var
    args_in_fixed = args_in - 1
    if args_in_fixed != args_correct:
        print ('Wrong number of arguments : ' + str(args_in_fixed))
        print ('Must be : ' + str(args_correct))
        return False
    else:
        print ('Correct number of arguments provided: ' + str(args_in_fixed) + "\n" )
        return True

def script_path_param(string1):
    # PUT IT INTO MODULE
    #import sys
    #import os
    script_name = os.path.basename(string1)
    script_dir = os.path.dirname(os.path.realpath(__file__)) + os.sep
    script_full = script_dir + script_name
    return ( script_name, script_dir, script_full )


def script_time_param():
    # Name: time parameter
    # Function: script_time_param
    # Input: None
    # Output: string with formatted time
    # Usage: print (script_time_param) or a = script_time_param
    return time.strftime("%c")

def count_file(file):
    chars = words = lines = 0
    with open(str(file), 'r') as in_file:
        for line in in_file:
            lines += 1
            words += len(line.split())
            chars += len(line)
    return (lines, words, chars)

def filetoliststrip(file):
    '''
  Function: filetoliststrip
  Description: Reads a file, stores in list (stripped)
  Input: File
  Output: List
  Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
  Notes: Path needs double \\ or reverse /
  '''
    file_in = str(file)
    lines = list(open(file_in, 'r'))
    content = [x.strip() for x in lines] 
    return content
    
    
    
#######################################
# SCRIPT START
print ("Script started at " + script_time_param() + "\n" )

print ("Script basic parameters: ")
print ("Script name" + script_path_param(sys.argv[0])[0])
print ("Script directory" + script_path_param(sys.argv[0])[1])
print ("Script full path" + script_path_param(sys.argv[0])[2])

    
print ("Parsing input file..\n")
try:
    input_file = filetoliststrip(file_in)
except:
    print ("ERROR: Parsing input file.\n")
    
    
# iterate over output list and print
print ('Writing to output..\n')
try:
    ot1_file = open(file_ot_00, "w")
except:
    print ("ERROR: Opening output file.\n")
    
    
for line in input_file:
        ot1_file.write(line.split()[start_in] + "\n")
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017

@author: P.Doulgeridis
"""




import os
import sys
import time
import logging



# #############################################################    
# Logging Configuration
# Available loggers: 
#   1. logging (general)
#   2. logger1 (boot/checks)
#   3. logger3 (main code)
#
# Usage:
# 'application' code
# logger.debug('debug message')
# logger.info('info message')
# logger.warn('warn message')
# logger.error('error message')
# logger.critical('critical message')
#
# set up logging to file - see previous section for more details
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                    datefmt='%m-%d %H:%M',
                    filename='hello.log',
                    filemode='w')
 
# define a Handler which writes INFO messages or higher to the sys.stderr
# If we want the lowest setting - we must set this to DEBUG.
console = logging.StreamHandler()
console.setLevel(logging.INFO)
 
# set a format which is simpler for console use
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
 

# tell the handler to use this format
console.setFormatter(formatter)
# add the handler to the root logger
logging.getLogger('').addHandler(console)
 
# Now, we can log to the root logger, or any other logger. First the root...
#logging.info('Jackdaws love my big sphinx of quartz.')
# logging.debug('THIS SHOULD NOT BE ON SCREEN')
# Now, define a couple of other loggers which might represent areas in your
# application:
 
logger1 = logging.getLogger('ColumnSlicer.BootChecks')
logger2 = logging.getLogger('ColumnSlicer.MainBody')
 
# Examples
#logger1.debug('Quick zephyrs blow, vexing daft Jim.')
#logger1.info('How quickly daft jumping zebras vex.')
# logger2.warning('Jail zesty vixen who grabbed pay from quack.')
# logger2.error('The five boxing wizards jump quickly.')
# logging.debug('blabla')
# ######
#

logger1.debug("Initial var assignment")
try:
    file_in = sys.argv[1]
    start_in = int(sys.argv[2]) - 1
    file_ot_00 = str(file_in) + ".proc.col"
    output_file_list = []
except:
    logger1.critical("ERROR: At initial variable assignment")

logger1.debug("Initial function declaration")
def usage(string1):
    print ("\n" + "Name: " + str(os.path.basename(string1)))
    print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
    return True


def chck_args_num(var, limit):
    '''
    Function : chck_args_num(var)
    Description: Checks n. of variables
    Input: Integer
    Output: Boolean
    Called as : chk_args(len(sys.argv))
    '''
    import sys
    args_correct = limit
    args_in = var
    args_in_fixed = args_in - 1
    if args_in_fixed != args_correct:
        print ('Wrong number of arguments : ' + str(args_in_fixed))
        print ('Must be : ' + str(args_correct))
        return False
    else:
        print ('Correct number of arguments provided: ' + str(args_in_fixed) + "\n" )
        return True

def script_path_param(string1):
    # PUT IT INTO MODULE
    #import sys
    #import os
    script_name = os.path.basename(string1)
    script_dir = os.path.dirname(os.path.realpath(__file__)) + os.sep
    script_full = script_dir + script_name
    return ( script_name, script_dir, script_full )


def script_time_param():
    # Name: time parameter
    # Function: script_time_param
    # Input: None
    # Output: string with formatted time
    # Usage: print (script_time_param) or a = script_time_param
    return time.strftime("%c")

def count_file(file):
    '''
    Name: count_file
    Function: Counts words/lines/chars in file
    Input: filename
    Output:tuple of (lines, words, chars)
    Usage: a = count_file(file_in)
    '''
    chars = words = lines = 0
    with open(str(file), 'r') as in_file:
        for line in in_file:
            lines += 1
            words += len(line.split())
            chars += len(line)
    return (lines, words, chars)

def filetoliststrip(file):
    '''
    Function: filetoliststrip
    Description: Reads a file, stores in list (stripped)
    Input: File
    Output: List
    Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
    Notes: Path needs double \\ or reverse /
    '''
    file_in = str(file)
    lines = list(open(file_in, 'r'))
    content = [x.strip() for x in lines] 
    return content
    

def reporting_f(orig_file, new_file):
    '''

    '''


#######################################
# SCRIPT START
logger2.info("@@\n")
logger2.info("Script started at " + script_time_param() + "\n" )

logger2.debug ("Script basic parameters: ")
logger2.debug("Script name" + script_path_param(sys.argv[0])[0])
logger2.debug ("Script directory" + script_path_param(sys.argv[0])[1])
logger2.debug ("Script full path" + script_path_param(sys.argv[0])[2])

logger2.info("Checking supplied arguments....")
if not chck_args_num(len(sys.argv), 2):
    logger2.critical('Terminating Script....')
    sys.exit()

logger2.info("Parsing input file..\n")
try:
    input_file = filetoliststrip(file_in)
except:
    logger2.critical("ERROR: Parsing input file.\n")
    
    
# iterate over output list and print
logger2.info('Writing to output..\n')
try:
    ot1_file = open(file_ot_00, "w")
except:
    logger2.critical("ERROR: Opening output file.\n")
    
    
for line in input_file:
        ot1_file.write(line.split()[start_in] + "\n")

# Reporting
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017

@author: P.Doulgeridis
"""


import os
import sys
import time


file_in = sys.argv[1]
file_ot = str(file_in) + ".proc"
file_ot2 = str(file_in) + ".proc2"
file_ot3 = str(file_in) + ".proc3"






def filetoliststrip(file):
    '''
  Function: filetoliststrip
  Description: Reads a file, stores in list (stripped)
  Input: File
  Output: List
  Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
  Notes: Path needs double \\ or reverse /
  '''
    file_in = str(file)
    lines = list(open(file_in, 'r'))
    content = [x.strip() for x in lines] 
    return content
    
    
dict_in = dict()    
seen = []

    
fileinlist = filetoliststrip(file_in)
out_file = open(file_ot, 'w')
out_file2 = open(file_ot2, 'w')
out_file3 = open(file_ot3, 'w')



for line in fileinlist:
    keyf = line[10:69]
    
    if keyf not in dict_in.keys():
        dict_in[keyf] = []
        dict_in[keyf].append(1)
        dict_in[keyf].append(line)
    else:
        dict_in[keyf][0] += 1
        dict_in[keyf].append(line)
        
        
        
        
for j in dict_in.keys():
    #print(dict_in[j])
    if dict_in[j][0] < 2:
        out_file.write(dict_in[j][1])
    elif dict_in[j][0] == 2:
        out_file2.write(dict_in[j][2])
    elif dict_in[j][0] > 2:
        out_file3.write(dict_in[j][3])
        
        
out_file.close()
out_file2.close()
out_file3.close()
        
        
####################################################################
# ADVANCED VERSION FOR LARGE FILES


# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017

@author: P.Doulgeridis
"""


import os
import sys
import time
import collections


file_in = sys.argv[1]
file_ot = str(file_in) + ".proc"
file_ot2 = str(file_in) + ".proc2"
file_ot3 = str(file_in) + ".proc3"








# dict_in = collections.defaultdict(list)  # save some time with a dictionary factory
# with open(file_in, "r") as f:  # open the file_in for reading
    # for line in f:  # read the file line by line
        # print("Read line: " + str(line))
        # key = line.strip()[10:69]  # assuming this is how you get your key
        # dict_in[key].append(line)  # add the line as an element of the found key
# # now that we have the lines in their own key brackets, lets write them based on frequency
# with open(file_ot, "w") as f1, open(file_ot2, "w") as f2, open(file_ot3, "w") as f3:
    # selector = {1: f1, 2: f2}  # make our life easier with a quick length-based lookup
    # for values in dict_in.values():  # use dict_in.itervalues() on Python 2.x
        # selector.get(len(values), f3).writelines(values)  # write the collected lines
        
        
        
dict_in = collections.defaultdict(list)  # save some time with a dictionary factory
with open(file_in, "r") as f:  # open the file_in for reading
    for line in f:  # read the file line by line
        print("read line: " + str(line))
        key = line.strip()[10:69]  # assuming this is how you get your key
        dict_in[key].append(line)  # add the line as an element of the found key
# now that we have the lines in their own key brackets, lets write them based on frequency
with open(file_ot, "w") as f1, open(file_ot2, "w") as f2, open(file_ot3, "w") as f3:
    selector = {1: f1, 2: f2}  # make our life easier with a quick length-based lookup
    for values in dict_in.values():  # use dict_in.itervalues() on python 2.x
        if len(values) == 1:
            f1.writelines(values)
        elif len(values) == 2:
            f2.writelines(values)
        else:
            f3.writelines(values)
        
        #selector.get(len(values), f3).writelines(values)  # write the collected lines
#!/usr/bin/python

###############################################################################
# Packages.
import sys

###############################################################################
# Global variables.

fltrName = ""
baseName = ""
comnName = ""
diffName = ""

fltrStrt = -1
fltrLeng = -1
baseStrt = -1
baseLeng = -1

# Constants.
ARG_NUM = 6

###############################################################################
# Functions.

def chk_args( p_sys_argv ):
   "Check command-line arguments."
   
   global ARG_NUM
   global fltrName, baseName, comnName, diffName
   global fltrStrt, fltrLeng, baseStrt, baseLeng
   
   if len(p_sys_argv) - 1 != ARG_NUM :
      print "WRONG number of arguments: ", len(p_sys_argv) - 1
      print "Must be: ", ARG_NUM
      print "Usage:", sys.argv[0], " File_Filter", "File_Base", " Filter_Start", "Filter_Length", " Base_Start", "Base_Length"
      return 0
   else :
      fltrName = sys.argv[1]
      baseName = sys.argv[2]
      comnName = baseName + ".cmn"
      diffName = baseName + ".dif"
      
      fltrStrt = int(sys.argv[3], 10)
      fltrLeng = int(sys.argv[4], 10)
      baseStrt = int(sys.argv[5], 10)
      baseLeng = int(sys.argv[6], 10)

      return 1

###############################################################################
# Start.

print sys.argv[0], "Started." ; print

# Check arguments.
print "Checking arguments..."

if chk_args( sys.argv ) == 0 :
   print "Exiting..."
   sys.exit(69)

print "fltrName = ", fltrName
print "baseName = ", baseName
print "comnName = ", comnName
print "diffName = ", diffName
print "fltrStrt = ", fltrStrt
print "fltrLeng = ", fltrLeng
print "baseStrt = ", baseStrt
print "baseLeng = ", baseLeng

###############################################################################
# Load fltr file to table.

print; print "Loading filter file to table..."

fltrFile = open(fltrName, "r")
fltrList = []

fltrFrom = fltrStrt - 1
fltrTo   = fltrFrom + fltrLeng

for fltrLine in fltrFile :
   fltrIndx = fltrLine[fltrFrom : fltrTo]
   fltrList.append(fltrIndx)
   print(fltrIndx)

fltrFile.close()

###############################################################################
# Proces base file.

print; print "Processing base file..."

baseFile = open(baseName, "r")
comnFile = open(comnName, "w")
diffFile = open(diffName, "w")

baseFrom = baseStrt - 1
baseTo   = baseFrom + baseLeng

for baseLine in baseFile:
   baseIndx = baseLine[baseFrom : baseTo]
   print(baseIndx)
   
   if baseIndx in fltrList :
      comnFile.write(baseLine)
   else :
      diffFile.write(baseLine)

baseFile.close()
comnFile.close()
diffFile.close()

###############################################################################
# End.

print; print sys.argv[0], "Ended."
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 31 12:45:00 2017

@author: P.Doulgeridis
"""

# necessary module to work with excel
#import openpyxl

# documentation: http://openpyxl.readthedocs.org/.




import os
import sys

rootDir = sys.argv[1]
sizing = sys.argv[2]








# for folderName, subFolders, fileNames in os.walk(rootDir):
	# for fileName in fileNames:
		# filePath = os.path.join(folderName, fileName)
		# if os.path.exists(filePath):
			# fileSize = os.path.getsize(filePath)
			# fileSize = fileSize/1024/1024 # Convert it to MB
			# if fileSize > int(sizing):
				# print("{0}\t{1}".format(fileSize,filePath))
                
                

def get_small_files(rootDir, sizing):
    r"""
    Name: get_small_files
    Function: get_small_files(rootDir, sizing)
    Input: rootDir, sizing
    Output: list
    Usage: print (get_large_files('/', 600))
    Storage: PYTHON FILE UTILITIES
    Notes: Recursive, returns a list of files.    
    """
    
    # Necessary modules
    import os
    import sys
    
    # Initiate list
    list_out = []
    
    # Iterate over os.walk, recursive
    for folderName, subFolders, fileNames in os.walk(rootDir):
        for fileName in fileNames:
            filePath = os.path.join(folderName, fileName)
            
            # check if file exists
            if os.path.exists(filePath):
                fileSize = os.path.getsize(filePath)
                fileSize = fileSize/1024/1024 # Convert it to MB
                
                # Compare and output.
                if fileSize < int(sizing):
                    print("{0}\t{1}".format(fileSize,filePath))
                    list_out.append((fileSize, filePath))
    return list_out

                
                
                
                
                
                
                
                
                
                
                
                
def get_large_files(rootDir, sizing):
    r"""
    Name: get_large_files
    Function: get_large_files(rootDir, sizing)
    Input: rootDir, sizing
    Output: list
    Usage: print (get_large_files('/', 1))
    Storage: PYTHON FILE UTILITIES
    Notes: Recursive, returns a list of files. 
    """
    
    # Necessary modules
    import os
    import sys
    
    # Initiate list
    list_out = []
    
    # Iterate over os.walk, recursive
    for folderName, subFolders, fileNames in os.walk(rootDir):
        for fileName in fileNames:
            filePath = os.path.join(folderName, fileName)
            
            # check if file exists
            if os.path.exists(filePath):
                fileSize = os.path.getsize(filePath)
                fileSize = fileSize/1024/1024 # Convert it to MB
                
                # Compare and output
                if fileSize > int(sizing):
                    print("{0}\t{1}".format(fileSize,filePath))
                    list_out.append((fileSize, filePath))
    return list_out
    
    
    
#print (get_large_files('/', 600))
print (get_small_files('/', 1))
    
import sys
import os
import subprocess




file_in = sys.argv[1]
range = int(sys.argv[2])
file_ot = file_in + ".proc"




print(file_in)
print(file_ot)

file_out = open(file_ot, 'w')

counter_line = 0
counter_title = 0
total_lines = 0


current_line = ""
previous_lines = []

dicta = dict()


        
with open(file_in, 'r') as f:
    for line in f:
        
        
        
        counter_line += 1
        # control substring
        control_s = line[10:20]
        
        if control_s not in dicta.keys():
            dicta[control_s] = []
            dicta[control_s].append(line)
        else:
            dicta[control_s].append(line)
        

key_list = []        
line_length = 0
for j in dicta.keys():
    line_length += len(dicta[j])
    
    
    
###########################################################

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 31 10:52:24 2017

@author: P.Doulgeridis
"""

import sys
import os



file_input = sys.argv[1]
file_ot = file_input + ".py.nodupl"

print(file_input)
print(file_ot)


seen = set()

outfile = open(file_ot, 'w')
for line in open(file_in, 'r'):

    # control = line[1:10]
    control = line

    if control not in seen:
        seen.add(control)
        outfile.write(line + "\n")
        
        
outfile.close()
 DOWNLOAD VIEW RAW
def hexdump(src, length=16):
    FILTER = ''.join([(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)])
    lines = []
    for c in range(0, len(src), length):
        chars = src[c:c+length]
        hex = ' '.join(["%02x" % ord(x) for x in chars])
        printable = ''.join(["%s" % ((ord(x) <= 127 and FILTER[ord(x)]) or '.') for x in chars])
        lines.append("%04x  %-*s  %s\n" % (c, length*3, hex, printable))
    return ''.join(lines)
    
    
    
# print(hexdump("asdfasdfsdfsdfasdfasdfasdfaksdfaksdfjaksdjsfasd", length=16))
 
#
#0000  61 73 64 66 61 73 64 66 73 64 66 73 64 66 61 73   asdfasdfsdfsdfas
#0010  64 66 61 73 64 66 61 73 64 66 61 6b 73 64 66 61   dfasdfasdfaksdfa
#0020  6b 73 64 66 6a 61 6b 73 64 6a 73 66 61 73 64      ksdfjaksdjsfasd
#!/usr/bin/python


# #################################################################
#
#
# -----------------------------------------------------------------
#
#
#
# -----------------------------------------------------------------
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
# #####################################################################
# to do

# reporting
# counters


import os
import sys
import time




def usage():
    print("""
Name: FQ.py 
Called as: FQ.py <file_in> <start> <end> <pattern1> <pattern2> ...<patternN>
Input: <file_in> : full path to file
       <start>      : integer
       <end>        : integer
       <pattern.>   : string
""")



# script time param
def script_time_param():
    # Name: time parameter
    # Function: script_time_param
    # Input: None
    # Output: string with formatted time
    # Usage: print (script_time_param) or a = script_time_param
    return time.strftime("%c")


# script params
def script_path_param(string1, vocal = 'yes'):
    '''
    Name: script_path_param
    Function: script_path_param(string1, vocal = 'yes')
    Input: script_path_param(sys.argv[0])  
    Output: [ T | F ]
    Usage: Precaches basic script parameters
    Notes: None
    '''
    #import sys
    #import os
    #Called as: script_path_param(sys.argv[0])
    
    try:
        script_name = os.path.basename(string1)
        script_dir = os.path.dirname(os.path.realpath(string1))
        script_full = script_dir + script_name
    except:
        print("Error loading script parameters. Possible problem with os module")
    
    if vocal != 'yes': 
        return ( script_name, script_dir, script_full )
    else:
        print("Script name: " + str(script_name))
        print("Script directory: " + str(script_dir))
        print("Script full: " + str(script_full))
        print ('Script started at: ' + time.strftime("%c"))
        return True


# isinteger
def typecheck(input, type):
    '''
    Name: typecheck
    Function: typecheck(input, type)
    Input:  * data
            * type = type definition:
                        int, 
                        string | str,
                        list,
                        dict,
                        tuple,
                        defaultdict,
                        array
    Output: [ T | F ]
    Usage: Checks data type against a specific type.
    Notes: None
    
    '''
    return isinstance(input, type)

       
        
# file exists
def fileexists(filepath):
  '''
  Function: filexists
  Description: Checks for existence of file
  Input: filepath (or raw string of it)
  Output: Boolean
  Usage: if filexists(file_in):...
  Notes: Depending on system may need to 
  conver to raw string with r'file_in.
  '''
  import os.path
  if os.path.exists(filepath):
    return True
  else:
    return False

    
# timer
def timing_val(func):
    '''
    Author: p.doulgeridis
    Name: timing_val
    Function: timing_val(func)
    Input: none, decorator function
    Output: prints elapsed time of execution
    Usage: @timing_val over function declaration
    Notes: wrapper timing function.
    '''
    def wrapper(*arg, **kw):
        
        t1 = time.time()
        res = func(*arg, **kw)
        t2 = time.time()
        print ('%r (%r, %r) %0.9f sec' % \
              (func.__name__, arg, kw, t1-t2))
        return (t2 - t1), res
    return wrapper 
    
    
def calcoffsetsub(sub_start, sub_length):
    '''
    Name: Calculates awk/bash style offset
    Function: calcoffsetsub
    Input: start, end
    Output: start, length for python
    Usage: calcoffsetsub(2, 10)
    Notes: None
    '''
    start_int = int(sub_start)
    length_int = int(sub_length)
    
    start_real = start_int - 1
    end_real = start_real + length_int
    
    return (start_real, end_real)


def parsearguments(list_in):
    '''
    Name: parsearguments
    Function: parsearguments(list_in)
    Input: sys.argv
    Output: <list> - arguments past the 3rd.
    Usage: parsearguments(sys.argv)
    Notes: None, script specific.
    '''
    out_list = []

    for index, argument in enumerate(sys.argv):
        if index > 3:
            out_list.append(argument)

    return out_list



# chk_args_type
def chck_args_type(string):
    '''
    Function : chck_args_type(string)
    Description: Checks the content of parameters
    Input: sys.argv
    Output: STDOUT print
    Called as chck_args_type(sys.argv)
    '''
    for i in range(len(string)):
        if i == 0:
            print "Arguments for script: %s" % sys.argv[0]
        else:
            print "%d. argument: %s" % (i,sys.argv[i])
    print("\n")
    
    return 0

def script_end():
    import time
    print("\n" + "Script ended at: " + time.strftime("%c"))   
    
@timing_val
def parse_file(file_in, filter_list_in):
    
    counter = 0
    count_found = 0
    file_ot = str(file_in) + ".filterpy"
    out_file = open(file_ot, 'w')
    
    with open(file_in, 'r') as f:
        for i, line in enumerate(f):
            fixed_line = line.rstrip()
            #print(fixed_line[calcoffsetsub(sub_start, sub_length)[0]:calcoffsetsub(sub_start, sub_length)[1]])
            key = fixed_line[calcoffsetsub(sub_start, sub_length)[0]:calcoffsetsub(sub_start, sub_length)[1]]
            #print(key)
        
            if key in filter_list:
                count_found += 1
                out_file.write(line)
                
        print("Read: " + str(i) + " lines.")
        print("Found: " + str(count_found) + " matches.")
                
# #######################################################
# Initial checks

# Parse non optinal parameters.
try:
    file_in = sys.argv[1]
    sub_start = sys.argv[2]
    sub_length = sys.argv[3]
    sub_pat = sys.argv[4]
except:
    print("Error: Parameter parsing. Terminating script")
    sys.exit(1)


# Check for existence of input file
if not fileexists(file_in):
    print("\n" + "Provided file could not be detected. Terminating")
    usage()
    sys.exit(1)

# Typecheck the next two positional parameters against int
if not typecheck(int(sub_start), int):
    print("\n" + "Provided number is not an integer. Terminating")
    usage()
    sys.exit(1)

if not typecheck(int(sub_length), int):
    print("\n" + "Provided number is not an integer. Terminating")
    usage()
    sys.exit(1)    
    
    
    
# ########################################################    
# Script Start
print("Initial controls successful. Launching script...")
print("\n")


# print script parameters
script_path_param(sys.argv[0])
print("\n")


# product filter_list from arguments                
filter_list = parsearguments(sys.argv)
print("\n")


# check args type - on screen report                
chck_args_type(sys.argv)
print("\n")


# Call main work - time it.
parse_file(file_in, filter_list)

            
# ##########################################################
# Script end
script_end()
#!/usr/bin/python
# -*- coding: utf-8 -*-                       

# ##########################################################################
#
#   Author: P.Doulgeridis
#
# # ------------------------------------------------------------------------
#   Use as: python BNsplit.py <file_in> <lines> <start> <end> <-v|-q>
#                                   
#          <file_in>: BN
#          <lines>  : ~42k
#          <start>  : integer
#          <end>    : integer
#
#
#
# # ------------------------------------------------------------------------ 
#
#   Location: C:\Users\p.doulgeridis\Desktop\weirdsplit\weirdsplit.py
#
# # ------------------------------------------------------------------------
#   Function: Reads the input file and limit and starts parsing, if the limit
#             gets exceeded mid grouping, then the entire group will be outputed
#             in the same file, then a new file will be initialized.
#
# # ------------------------------------------------------------------------
#
#   Notes: 
#           Encoding problem with input file: solved with errors=ignore. 
#           Check encoding python.
#           
#           Used to split BN files in banks <= 14 and banks > 14
#
# # ------------------------------------------------------------------------
#


# ##########################################################################
# Necessary Modules
import sys
import os
import time

try:
    import argparse
except:
    print("Failed to load argparse. Terminating")
    sys.exit(2)


# ##########################################################################
# Initiate Parser
# MODIFY: CENTRAL HELP TEXT
parser = argparse.ArgumentParser(
        prog="GroupSplit", 
        formatter_class=argparse.RawDescriptionHelpFormatter, 
        #description="calculate X to the power of Y",
        description='''\
#        
#       GroupSplit v.3
#       --------------------------------
#       Author: p.doulgeridis
#       Description: Splits a text file into a number
#       of files that have size = lines_in provided, 
#       keeping the groupings intact.
#
#       Caution: Certain files may exceed limit due to grouping.
#
#        ''',
        epilog="Additional info")


#######
# Initiate mutually exclusive group. 
# SET BY DEFAULT FOR VERBOSE/QUIET
# IF YOU NEED MORE EXCLUSIVE OPTIONS, ADD A DIFFERENT GROUP.
#
group = parser.add_mutually_exclusive_group()
group.add_argument("-v", "--verbose", action="count", default=0)
group.add_argument("-q", "--quiet", action="store_true")        


######
# Positional Arguments (Necessary)
# POSSIBLE KINDS (actions, types)
#
parser.add_argument("file", type=str, help="Provide the file")
parser.add_argument("lines", type=int, help="Provide the target lines")
parser.add_argument("start", type=int, help="Provide the beginning of substring - notepad++ column")
parser.add_argument("end", type=int, help="Provide the end of the substring - notepad++ column")


###### 
# Parse arguments
args = parser.parse_args()


######
# Assign arguments
# NUM_OF_LINES=args.lines
# filename = args.file



# ##################################################################
# Declare functions and wrappers


# Reporting func
def reporting(lines_in):
    print("Operation Finished. Read: " + str(lines_in) + " lines.")

    

# file exists
def fileexists(filepath):
  '''
  Function: filexists
  Description: Checks for existence of file
  Input: filepath (or raw string of it)
  Output: Boolean
  Usage: if filexists(file_in):...
  Notes: Depending on system may need to 
  conver to raw string with r'file_in.
  '''
  import os.path
  if os.path.exists(filepath):
    return True
  else:
    return False    

    
    
# isinteger
def typecheck(input, type):
    return isinstance(input, type)

    
    
# file backup
def backupfile(src):
    import shutil
    backup = str(src) + ".bak"

    
    
# script params
def script_path_param(string1, vocal = 'yes'):
    '''
    Name:
    Function:
    Input:
    Output:
    Usage:
    Notes:
    '''
    #import sys
    #import os
    # Called as: script_path_param(sys.argv[0])
    
    try:
        script_name = os.path.basename(string1)
        script_dir = os.path.dirname(os.path.realpath(string1))
        script_full = script_dir + script_name
    except:
        print("Error loading script parameters. Possible problem with os module")
    
    if vocal != 'yes': 
        return ( script_name, script_dir, script_full )
    else:
        print("Script name: " + str(script_name))
        print("Script directory: " + str(script_dir))
        print("Script full: " + str(script_full))
        print ('Script started at: ' + time.strftime("%c"))
        return True


        
# Script End
def script_end():
    import time
    print("Script ended at: " + time.strftime("%c"))
    
 
 
# script time param
def script_time_param():
    # Name: time parameter
    # Function: script_time_param
    # Input: None
    # Output: string with formatted time
    # Usage: print (script_time_param) or a = script_time_param
    return time.strftime("%c")


# ##################################################################
# PARSE ARGUMENTS AND FORMAT
try:
    file_in = args.file
    input_line_limit = args.lines
    start_in = args.start
    end_in = args.end
except:
    print("Failed to parse arguments")
else:
    print("Arguments parsed succesfully")
finally:
    start_in = int(start_in)
    end_in = int(end_in)

# ###################################################################
# CHECKS

if not fileexists(file_in):
    print("Error: Input file could not be located. Terminating.")
    sys.exit(1)

if not typecheck(start_in, int):
    print("\n" + "Error: Provided number is not an integer. Terminating")
    sys.exit(9)

if not typecheck(end_in, int):
    print("\n" + "Error: Provided number is not an integer. Terminating")
    sys.exit(9)    
    
if not typecheck(input_line_limit, int):
    print("\n" + "Error: Provided number is not an integer. Terminating")
    sys.exit(9)


# #####################################################################
# Initialize Iteration Vars
    
group = ""
prev_group = ""
file_count = 0
dict_group = {}


# #####################################################################
# Script Start


# Handle quiet/verbose arguments
if args.quiet:
    pass
elif args.verbose:

    # Handle multiple verbosity values (ie, -vvv)
    if args.verbose > 3:
        # full script reporting
        print("\n" + "Launching script:")
        script_path_param(sys.argv[0])
        print("\n")
        chck_args_type(sys.argv)
    elif args.verbose >= 2:
        script_path_param(sys.argv[0])
    elif args.verbose >= 1:
        print("Processing file: " + str(file_in))



with open(file_in, 'r') as f:

    fout = open(str(file_in)  + ".out" + ".0.txt", "w")
    line_status = 0
    file_count += 1
    counter = 0
    last_line = ''
    total_lines = 0
    file_line_counter = 0
    file_lines = []
    
    for i, lines in enumerate(f):
        
        print("#####")
        print("Line: " + str(i) + " with content: " + str(lines))
        
        last_line = lines
        
        # parse group
        #group = lines[2:6]
        group = lines[start_in:end_in]
        print("Comp: " + str(group) + " : " + str(prev_group))
        
        
        # check if group is different than previous
        if group != prev_group:
        
            print("new group: " + str(group))
            
            # check line status
            print("Checking line status: " + str(line_status) + " <-> " + str(input_line_limit))
            if line_status < input_line_limit:
                if prev_group in dict_group.keys():
                    print("Writing to output")
                    for j in dict_group[prev_group]:
                        line_status += 1
                        fout.write(j)
                        file_line_counter += 1
                    
                    
                    file_lines.append(file_line_counter)
                    file_line_counter = 0
            
            if line_status >= input_line_limit:
                print("Checking line status: " + str(line_status) + " <-> " + str(input_line_limit))
                print("Starting new file")
                #file_count += 1
                fout.close()
                fout = open(str(file_in) + ".out" + ".%d.txt"%(file_count), "w")
                file_count += 1
                line_status = 0
            
            # initialize new group
            dict_group = {}
            dict_group[group] = []
            
            # add line to group 
            dict_group[group].append(lines)
        
        else: 
        
            # same group
            dict_group[group].append(lines)
            
        prev_group = group
    

    for j in dict_group[group]:
        fout.write(j)
        file_line_counter += 1
    
    file_lines.append(file_line_counter)
    file_line_counter = 0

    
    #fout.write(lines)
    fout.close()
    
    
    
    # Reporting
    print("Reporting:")
    print(file_lines)
    print("Lines read: " + str(int(i + 1)))         # i starts at 0
    print("Total lines in all files: " + str(sum(file_lines)))
    

# ##########################################################################################
# Script end
script_end()
#!/usr/bin/python





# ##########################################################################
# Necessary Modules
import sys
import os
import time


try:
    import collections
except:
    print("Failed to load module: Collections")
    sys.exit(1)
else:
    print("Module: collections loaded succesfully")

try:
    import argparse
except:
    print("Failed to load argparse. Terminating")
    sys.exit(2)


# ##########################################################################
# Initiate Parser
# MODIFY: CENTRAL HELP TEXT
parser = argparse.ArgumentParser(
        prog="BNSplitcalc", 
        formatter_class=argparse.RawDescriptionHelpFormatter, 
        #description="calculate X to the power of Y",
        description='''\
#        
#       BNSplit v.2
#       --------------------------------
#       Author: p.doulgeridis
#       Description: Splits a text file into a number
#       of files that have size = lines_in provided, 
#       keeping the groupings intact.
#
#       Caution: Certain files may exceed limit due to grouping.
#
#        ''',
        epilog="Additional info")


#######
# Initiate mutually exclusive group. 
# SET BY DEFAULT FOR VERBOSE/QUIET
# IF YOU NEED MORE EXCLUSIVE OPTIONS, ADD A DIFFERENT GROUP.
#
group = parser.add_mutually_exclusive_group()
group.add_argument("-v", "--verbose", action="count", default=0)
group.add_argument("-q", "--quiet", action="store_true")        


######
# Positional Arguments (Necessary)
# POSSIBLE KINDS (actions, types)
#
parser.add_argument("file", type=str, help="Provide the file")
# parser.add_argument("start", type=int, help="Provide the beginning of substring - notepad++ column")
# parser.add_argument("end", type=int, help="Provide the end of the substring - notepad++ column")


###### 
# Parse arguments
args = parser.parse_args()


######
# Assign arguments
# NUM_OF_LINES=args.lines




# ##################################################################
# Declare functions and wrappers


# Reporting func
def reporting(lines_in):
    print("Operation Finished. Read: " + str(lines_in) + " lines.")

    

# file exists
def fileexists(filepath):
  '''
  Function: filexists
  Description: Checks for existence of file
  Input: filepath (or raw string of it)
  Output: Boolean
  Usage: if filexists(file_in):...
  Notes: Depending on system may need to 
  conver to raw string with r'file_in.
  '''
  import os.path
  if os.path.exists(filepath):
    return True
  else:
    return False    

    
    
# isinteger
def typecheck(input, type):
    return isinstance(input, type)

    
    
# file backup
def backupfile(src):
    import shutil
    backup = str(src) + ".bak"

    
    
# script params
def script_path_param(string1, vocal = 'yes'):
    '''
    Name:
    Function:
    Input:
    Output:
    Usage:
    Notes:
    '''
    #import sys
    #import os
    # Called as: script_path_param(sys.argv[0])
    
    try:
        script_name = os.path.basename(string1)
        script_dir = os.path.dirname(os.path.realpath(string1))
        script_full = script_dir + script_name
    except:
        print("Error loading script parameters. Possible problem with os module")
    
    if vocal != 'yes': 
        return ( script_name, script_dir, script_full )
    else:
        print("Script name: " + str(script_name))
        print("Script directory: " + str(script_dir))
        print("Script full: " + str(script_full))
        print ('Script started at: ' + time.strftime("%c"))
        return True


def pretty_print(b):
    '''
    Function: pretty_print
    Description : Pretty prints a dictionary
    Input : Dictionary
    Output: STDOUT
    Usage(print) : pretty_print(b)
    Usage(Assign): b = pretty_print(b) - True
    Notes : Only prints on screen
    '''
    print ("{ ")
    for a in b.keys():
        print ( "\t" + str(a) + " : " + str(b[a]) )
    print ("}\n")        
        
        
# Script End
def script_end():
    import time
    print("Script ended at: " + time.strftime("%c"))
    
 
 
# script time param
def script_time_param():
    # Name: time parameter
    # Function: script_time_param
    # Input: None
    # Output: string with formatted time
    # Usage: print (script_time_param) or a = script_time_param
    return time.strftime("%c")



def keywithmaxval(d):
 """ a) create a list of the dict's keys and values; 
     b) return the key with the max value"""  
 if len(d) != 0:
    v=list(d.values())
    k=list(d.keys())
    return k[v.index(max(v))]
 else:
    return 0    

    
    
def gettotalvalue(list_in, dict_in):
    
    in_dict = dict(dict_in)
    
    #print(in_dict)
    ot = 0
    for j in list_in:
        #print(j, typecheck(j, str),in_dict[j])
        ot += dict_in[j]
        
    return ot
    
    
    
def dictfunc(dict_in):
    print(dict_in)
    
    
    
def process(dict_in, limit_in):
    itercounter = 0
    dict_copy = dict(dict_in)
    pretty_print(dict_copy)
    print("LIMIT IS: " + str(limit_in))
    
    while len(dict_copy) != 0:
        itercounter += 1
        proc_key = keywithmaxval(dict_copy)
        print(proc_key)
    
        
        outlist1_length = gettotalvalue(outlist1, dict_count)
        outlist2_length = gettotalvalue(outlist2, dict_count)
        
        print("Outlist1: " + str(outlist1_length))
        print("Outlist2: " + str(outlist2_length))
    
        if itercounter % 2 == 1:
            outlist1.append(proc_key)
            del dict_copy[proc_key]
        else:
            outlist2.append(proc_key)
            del dict_copy[proc_key]    
    
        if gettotalvalue(outlist1, dict_count) > limit_in:
            print("LIMIT EXCEEDED")
            for j in dict_copy.keys():
                outlist2.append(j)
                del dict_copy[j]
            
            print(outlist1, gettotalvalue(outlist1, dict_count))
            print(outlist2, gettotalvalue(outlist2, dict_count))
            return (outlist1, outlist2)

            
        
        #pretty_print(dict_copy)
    
    
    
    
    
        #del dict_copy[proc_key]
    
    # pretty_print(dict_count2)
    # pretty_print(dict_count)
    
    print(outlist1, gettotalvalue(outlist1, dict_count))
    print(outlist2, gettotalvalue(outlist2, dict_count))


    
    return (outlist1, outlist2)
    
    
    
# ##################################################################
# PARSE ARGUMENTS AND FORMAT
try:
    file_in = args.file
    start_in = 2
    end_in = 6
except:
    print("Failed to parse arguments")
else:
    print("Arguments parsed succesfully")
finally:
    start_in = int(start_in)
    end_in = int(end_in)
    
    
    
# ###################################################################
# CHECKS

if not fileexists(file_in):
    print("Error: Input file could not be located. Terminating.")
    sys.exit(1)

if not typecheck(start_in, int):
    print("\n" + "Error: Provided number is not an integer. Terminating")
    sys.exit(9)

if not typecheck(end_in, int):
    print("\n" + "Error: Provided number is not an integer. Terminating")
    sys.exit(9)    
    
# if not typecheck(input_line_limit, int):
    # print("\n" + "Error: Provided number is not an integer. Terminating")
    # sys.exit(9)
    
    
    
    
    
# #####################################################################
# Initialize Iteration Vars
# #####################################################################    
    
group = ""
prev_group = ""
file_count = 0
counter2 = 0
dict_group = {}


# #####################################################################
# Script Start
# #####################################################################

# Handle quiet/verbose arguments
if args.quiet:
    pass
elif args.verbose:

    # Handle multiple verbosity values (ie, -vvv)
    if args.verbose > 3:
        # full script reporting
        print("\n" + "Launching script:")
        script_path_param(sys.argv[0])
        print("\n")
        chck_args_type(sys.argv)
    elif args.verbose >= 2:
        script_path_param(sys.argv[0])
    elif args.verbose >= 1:
        print("Processing file: " + str(file_in))

        
        
# Calculate input_line_limit
print("Calculating input line limit....")
count = len(open(file_in).readlines())
print("Total line count: " + str(count))
input_line_limit = int(count / 2)
print("Estimated split: " + str(int(count / 2)))
print("Estimated split: " + str(input_line_limit))

# Initialize two dicts
#   1. for entire data 
#   2. for counters and picking
dict_out = collections.defaultdict(list)
dict_count = collections.defaultdict(int)


#print(start_in)
#print(end_in)


print("Reading input file and populating dict...")
with open(file_in, 'r') as f:
    for line in f:
        file_count += 1
        line_fixed = line.rstrip("\n")
        key = line_fixed[start_in:end_in]
        #print(key)
        dict_out[key].append(line_fixed)
        
        
#pretty_print(dict_out)

print("Reading populated dict, creating counter dict")
for key_in in dict_out.keys():
    dict_count[key_in] = len(dict_out[key_in])
    counter2 += dict_count[key_in]
    
print("\n" + "Reporting:")    
print("read: " + str(file_count))
print("dict: " + str(counter2))    
pretty_print(dict_count)

#print(keywithmaxval(dict_count))
#print(len(dict_count))

# Initialize output lists, and processed key list.
outlist1 = []
outlist2 = []
processed_keys = []
out_counter = 0


# ##################################################################
# Main Job
# ##################################################################

# Initialize output filenames
file_ot = str(file_in) + ".out.0.txt"
file_ot2 = str(file_in) + ".out.1.txt"

# Open output files
outfile1 = open(file_ot, 'w')
outfile2 = open(file_ot2, 'w')

# Calculating split 
# Call on main function -> process
print("Calculate split....")
output = process(dict_count, input_line_limit)
print("Output is: " + str(output))

print("Processing 1st list: " + str(output[0]))
for j in output[0]:
    print("Processing key and writing : " + str(j))
    processed_keys.append(j)
    for line in dict_out[j]:
        outfile1.write(str(line) + "\n")
        out_counter += 1
        

print("Processing 2nd list: " + str(output[1]))
for j in output[1]:
    print("Processing key and writing: " + str(j))
    processed_keys.append(j)
    for line in dict_out[j]:
        outfile2.write(str(line) + "\n")
        out_counter += 1






outfile1.close()
outfile2.close()


# ###################################################################
# Final Controls and reporting
# ###################################################################

print("Final control...")
print("Lines read: " + str(count))
print("Lines written: " + str(count))
print("Processed keys: " + str(processed_keys))
difference = int(count) - int(out_counter)
if difference != 0:
    print("PROBLEM")
    sys.exit(1)
#!/usr/bin/python

# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017
 
@author: P.Doulgeridis
"""
 
 
import os
import sys
import time
import collections
 
 
file_in = sys.argv[1]
file_ot = str(file_in) + ".proc"
file_ot2 = str(file_in) + ".proc2"
file_ot3 = str(file_in) + ".proc3"
 

counter = 0        
        
dict_in = collections.defaultdict(list)  
with open(file_in, "r") as f:  
    for line in f:  
        #print("read line: " + str(line))
        counter += 1
        fixed_line = line.strip()
        line_list = fixed_line.split(";")
        key = line_list[0][:12]
        print(":Key: " + str(key))
        dict_in[key].append(line)

        
with open(file_ot, "w") as f1, open(file_ot2, "w") as f2, open(file_ot3, "w") as f3:
    selector = {1: f1, 2: f2}  
    for values in dict_in.values():  
        if len(values) == 1:
            f1.writelines(values)
        elif len(values) == 2:
            f2.writelines(values)
        else:
            f3.writelines(values)
        
      
        
        
        
print("Read: " + str(counter) + " lines")

# -*- coding: utf-8 -*-
"""
Created on      : 2019-07-25 01:56:25.767359
Name            : script_main.py
Project         : [[project]
Package         : [[PackageName]]
Function        : [[Description]]
Type            : [[Debug | Util | Flow ]]
Generated by    :

@author: P.Doulgeridis

Documentation: [Enter a brief description]


Script IO    : [List all relevant File IO]


System Calls : [Yes/No]


Parameters   : [List parameters w/ type]


Run as       : [List usage example]
"""




#######################################################################  
# Imports 





#######################################################################  
# Standard Modules
import sys
import os
import time

#######################################################################  
# Adv Modules

#
try:
    import argparse
except:
    autolog("Could not import argparser module. Check requirements file")

#    
try:
    import logging
except:
    autolog("Could not import logging module. Check requirements file")

#
try:
    import string
except:
    autolog("Could not import string module. Check requirements file")

#
try:
    import operator
except:
    autolog("Could not import operator module. Check requirements file")

#
try:
    import collections 
except:
    autolog("Could not import collections module. Check requirements file")

#
try:
    from datetime import datetime
except:
    autolog("Failed to load module: datetime")

#
try:
    import subprocess
except:
    autolog("Failed to load module: Subprocess")
    
    
#######################################################################  
# Variables
ARGS_NUM = 2
t_counter = 0
s_counter = 0



#######################################################################  
# Functions 
# timer
def timing_val(func):
    '''
    Author: p.doulgeridis
    Name: timing_val
    Function: timing_val(func)
    Input: none, decorator function
    Output: autologs elapsed time of execution
    Usage: @timing_val over function declaration
    Notes: wrapper timing function.
    '''
    def wrapper(*arg, **kw):
        
        t1 = time.time()
        res = func(*arg, **kw)
        t2 = time.time()
        autolog ('%r (%r, %r) %0.9f sec' % \
              (func.__name__, arg, kw, t1-t2))
        return (t2 - t1), res
    return wrapper 


def chck_args_num(var, arg_num):
    '''
    Function : chck_args_num(var)
    Description: Checks n. of variables
    Input: Integer
    Output: Boolean
    Called as : chk_args(len(sys.argv))
    '''
    import sys
    args_correct = int(arg_num)
    args_in = var
    args_in_fixed = args_in - 1
    
    autolog ("Checking provided arguments: ")
    
    if args_in_fixed != args_correct:
        autolog ('Wrong number of arguments : ' + str(args_in_fixed))
        autolog ('Must be : ' + str(args_correct))
        return False
    else:
        autolog ('Correct number of arguments provided: ' + str(args_in_fixed) + "\n" )
        return True        

    
def usage():
    autolog("""
Name: FQ.py 
Called as: FQ.py <file_in> <start> <end> <pattern1> <pattern2> ...<patternN>
Input: <file_in> : full path to file
       <start>      : integer
       <end>        : integer
       <pattern.>   : string
""")

# script time param
def script_time_param():
    # Name: time parameter
    # Function: script_time_param
    # Input: None
    # Output: string with formatted time
    # Usage: autolog (script_time_param) or a = script_time_param
    return time.strftime("%c")

def script_params(string1):
    '''
    Function: script_params(string)  
    Description: autologs all the basic script parameters
    Input: sys.argv[0]
    Output: STDOUT autolog
    Called as: script_params(sys.argv[0])
    Notes: Always returns true
    '''
    import time
    import sys
    
    sname = str(string1)
    sdir = str(sys.path[0])
    sfpath = str(sys.path[0] + "\\" + string1)
    
    autolog ('Script name : ' + string1)
    autolog ('Script directory : ' + sys.path[0] )
    autolog ('Script full path: ' + sys.path[0] + "\\" + string1)
    autolog ("\n")
    autolog ('Script started at: ' + time.strftime("%c") + "\n")
    
    return(sname, sdir, sfpath)
    
    
def script_path_param(string1, vocal = 'yes'):
    '''
    Name: script_path_param
    Function: script_path_param(string1, vocal = 'yes')
    Input: script_path_param(sys.argv[0])  
    Output: [ T | F ]
    Usage: Precaches basic script parameters
    Notes: None
    '''
    #import sys
    #import os
    #Called as: script_path_param(sys.argv[0])
    
    try:
        script_name = os.path.basename(string1)
        script_dir = os.path.dirname(os.path.realpath(string1))
        script_full = script_dir + script_name
    except:
        autolog("Error loading script parameters. Possible problem with os module")
    
    if vocal != 'yes': 
        return ( script_name, script_dir, script_full )
    else:
        autolog("Script name: " + str(script_name))
        autolog("Script directory: " + str(script_dir))
        autolog("Script full: " + str(script_full))
        autolog ('Script started at: ' + time.strftime("%c"))
        return True    



# chk_args_type
def chck_args_type(string):
    '''
    Function : chck_args_type(string)
    Description: Checks the content of parameters
    Input: sys.argv
    Output: STDOUT autolog
    Called as chck_args_type(sys.argv)
    '''
    for i in range(len(string)):
        if i == 0:
            autolog ("Arguments for script: %s" % sys.argv[0])
        else:
            autolog ("%d. argument: %s" % (i,sys.argv[i]))
    autolog("\n")
    
    return 0


    
def script_end():
    import time
    autolog("\n" + "Script ended at: " + time.strftime("%c"))  
    
    
def pretty_autolog(b):
    '''
    Function: pretty_autolog
    Description : Pretty autologs a dictionary
    Input : Dictionary
    Output: STDOUT
    Usage(autolog) : pretty_autolog(b)
    Usage(Assign): b = pretty_autolog(b) - True
    Notes : Only autologs on screen
    '''
    autolog ("{ ")
    for a in b.keys():
        autolog ( "\t" + str(a) + " : " + str(b[a]) )
    autolog ("}\n")
    
def autolog(message):
    """
    Function: autolog()
    Description: Autologs - autologs to stdout with caller and line.no
    Input: Message <string> 
    Output: Message will be appended to the caller\line.no string
    Usage: autolog("This is a test message from autolog")
    Notes:  Get the previous frame in the stack, otherwise it would be this function!!!
    """
    #x = input("enter")
    import inspect, logging
    func = inspect.currentframe().f_back.f_code
    #autolog(func)
    #autolog(inspect.getframeinfo(f_code))
    #autolog(inspect.stack())
    #autolog(inspect.trace())
    #x = input("enter")
    # Dump the message + the name of this function to the log.
    print("[[Script: %s]]:[Method: %s]: Line:%i -> : %s" % ( 
        func.co_filename, 
        func.co_name, 
        func.co_firstlineno,
        message
    ))


def autologfile(message, filename):
    """
    Function: autolog()
    Description: Autologs - autologs to stdout with caller and line.no
    Input: Message <string> 
    Output: Message will be appended to the caller\line.no string
    Usage: autolog("This is a test message from autolog")
    Notes:  Get the previous frame in the stack, otherwise it would be this function!!!
    """
    import inspect, logging
    func = inspect.currentframe().f_back.f_code
    #autolog(func)
    #autolog(inspect.getframeinfo(f_code))
    #autolog(inspect.stack())
    #autolog(inspect.trace())
    # Dump the message + the name of this function to the log.
    
 
    
    with open(filename, 'a') as f:
    
        f.write("[[Script: %s]]:[Method: %s]: Line:%i -> : %s" % ( 
            func.co_filename, 
            func.co_name, 
            func.co_firstlineno,
            message
        ))


def fileexists(filepath):
  '''
  Function: filexists
  Description: Checks for existence of file
  Input: filepath (or raw string of it)
  Output: Boolean
  Usage: if filexists(file_in):...
  Notes: Depending on system may need to 
  conver to raw string with r'file_in.
  '''
  import os.path
  if os.path.exists(filepath):
    return True
  else:
    return False
    
 
def direxists(filepath):
  import os.path
  if os.path.exists(filepath):
    return True
  else:
    return False
    
@timing_val
def main_work(dir_in, file_ot):


    global t_counter

    separator_in ="""


#################################################################

"""

    
    with open(file_ot, 'w', encoding='utf-8') as f:
        
        for file in os.listdir(dir_in):
            t_counter += 1
            autolog("Processing: " + str(file))
            
            filepath = os.path.abspath(os.path.join(dir_in, file))
            
            autolog("Processing: " + str(filepath))
            
            
            with open(filepath, 'r', encoding='utf-8') as g:
                for line in g:
                    f.write(line)
            
            f.write(separator_in)


    
    autolog("Processed: " + str(t_counter) + " files.")
    
    
    pass



#######################################################################    
# Initial Checks

# Start.
autolog; 
autolog (str(sys.argv[0] + " Started.\n") )


# Checking n. of arguments
if not chck_args_num(len(sys.argv), ARGS_NUM):
    autolog ('Terminating Script....')
    sys.exit()   

# Report on script parameters
script_params(sys.argv[0]) 


########################################################################
# I/O

file_in = sys.argv[1]
file_ot = sys.argv[2]
#start_in = int(sys.argv[2])
#length_in = int(sys.argv[3])

#Check that input files exist
if not direxists(file_in):
    autolog("File: " + str(file_in) + " failed to be located. Terminating.")
    sys.exit(2)
else:
    autolog("File: " + str(file_in) + " located. Proceeding.")

    
if fileexists(file_ot):
    autolog("File: " + str(file_ot) + " already exists. Terminating.")
    sys.exit(3)
else:
    autolog("File: " + str(file_ot) + " ready to be written")
    
    
########################################################################
# Main Work 


if __name__ == '__main__':
	main_work(file_in, file_ot)



########################################################################
# Script end and reporting
autolog; 
autolog (str(sys.argv[0]) +"Ended.")
autolog;
#



##########################################################################
#   DIRECTORY COMPARE V.1                                                #
#                                                                        #
#   Usage :                                                              #
#                                                                        #
#   python DirComp.py <dir1> <dir2>                                      #
#                                                                        #
#   Function :                                                           #
#                                                                        #
#   Sizes and compares directories based on filenames, prints out the    #
#   filenames that only exist in one directory of the two, for both      #
#   directories.                                                         #
#                                                                        #
#   Notes :                                                              #
#                                                                        #
#   Main function is "build_files_set" which takes a directory as input  #
#   and parses each file name in full path, relative path, and assigns   #
#   them to a set so we can do set operations.                           #
#                                                                        #
#   The compare_directories function simply calculates the differences   #
#   between the given sets.                                              #
#                                                                        #
#   Modules:                                                             #
#                                                                        #
#   import os                                                            #
#   import sys                                                           #
#   import re                                                            #
#   import subprocess                                                    #
#   import time                                                          #
##########################################################################




import os
import sys
import re
import subprocess
import time
import collections

# Get the script path
def get_script_path():
    return os.path.dirname(os.path.realpath(sys.argv[0]))

# Parse files of directory
def build_files_set(rootdir):
    root_to_subtract = re.compile(r'^.*?' + rootdir + r'[\\/]{0,1}')
    # Assign relative paths to set for comparison
    files_set = set()
    for (dirpath, dirnames, filenames) in os.walk(rootdir):
        for filename in filenames + dirnames:
            full_path = os.path.join(dirpath, filename)
            relative_path = root_to_subtract.sub('', full_path, count=1)
            files_set.add(relative_path)

    return files_set

# Compare sets 
def compare_directories(dir1, dir2):
    files_set1 = build_files_set(dir1)
    files_set2 = build_files_set(dir2)
    return (files_set1 - files_set2, files_set2 - files_set1)

    
def compare_bool(dir1, dir2):
    files_set1 = build_files_set(dir1)
    files_set2 = build_files_set(dir2)
    compare = lambda dir1, dir2: collections.Counter(files_set1) == collections.Counter(files_set2)
    return compare
    
def are_eq(a, b):
    files_set1 = build_files_set(a)
    files_set2 = build_files_set(b)
    return set(a) == set(b) and len(a) == len(b)    
    
    
# Get size - Not working
def get_size(start_path = '.'):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size
 
# call to system du - working
def du(path):
    """disk usage in human readable format (e.g. '2,1GB')"""
    return subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8') 
    

def main():
    
    if __name__ == '__main__':

    # ##
    # Script parameters 
        print (' ')
        print ("Script run at: " + str(time.strftime("%H:%M:%S")))
        print ("Script run from: " + get_script_path())
    
    # ##
    # Process arguments
        print (' ')
        total = len(sys.argv)
        cmdargs = str(sys.argv)
    # print ("The total numbers of args passed to the script: %d " % total)
    # print ("Args list: %s " % cmdargs)
    
    # print ("Script name: %s" % str(sys.argv[0]))
    # print ("First argument: %s" % str(sys.argv[1]))
    # print ("Second argument: %s" % str(sys.argv[2]))

        dir1 = str(sys.argv[1])
        dir2 = str(sys.argv[2])
    
    # DEBUG
    # dir1 = '/home/tede/n55115/PD/UTILS/DirectoryComparison/ORIGINFOLDERa'
    # dir2 = '/home/tede/n55115/PD/UTILS/DirectoryComparison/BATCHINFO'
    
    # ##
    # Compare Directories
        in_dir1, in_dir2 = compare_directories(dir1, dir2)

    # ## 
    # Output 
        print (' ')
        print 'Comparing Files: '
        print (dir1)
        print (dir2)
    
    
        print (' ')
        print 'Comparing sizes: ' 
        size1 = du(dir1)
        size2 = du(dir2)
        print ("Directory " + dir1 + " size: " + str(size1))
        print ("Directory " + dir2 + " size: " + str(size2))
    
        print '\nFiles only in {}:'.format(dir1)
        for relative_path in in_dir1:
            print '* {0}'.format(relative_path) 

        print '\nFiles only in {}:'.format(dir2)
        for relative_path in in_dir2:
            print '* {0}'.format(relative_path)
    
    # Boolean
        return are_eq(dir1, dir2)
    
main()

print main()
# Two primary data structures are created:

# (1) A list of tuples. Each tuple contains a pair of items: a file size and a 
# file path. The file size is the size of the file pointed to by the file path. 
# The list is sorted on the file sizes. The set of file paths consists of all 
# the paths to the files in directory_l (recursively including subdirectories 
# of directory_l and excluding hidden files and folders by default).

# For example:
# [(file_size_1, file_path_1), (file_size_2, file_path_2), ..., 
# (file_size_n, file_path_n)]

# file_size_1 = size of the file pointed to by file_path_1, 
# file_size_2 = size of the file pointed to by file_path_2, ..., 
# file_size_n = size of the file pointed to by file_path_n

# file_size_1 <= file_size_2 <= ... <= file_size_n

# file_path_1, file_path_2, ..., file_path_n = all the paths to the files in 
# directory_l (recursively including subdirectories of directory_l and 
# excluding hidden files and folders by default)

# (2) A dictionary mapping each unique file size in directory_r to a list of 
# all the paths to files of that size in directory_r (recursively including 
# subdirectories of directory_r and excluding hidden files and folders by 
# default).


# For each file pointed to in (1), its size is checked for existence in (2). 
# If its size does not exist in (2), the file path to it is stored as 
# unmatched. If its size does exist in (2), a byte by byte comparison is done 
# between it and each file matching its size in (2) until a match is found, if 
# any. If a match is not found, the file path to it is stored as unmatched. The 
# stored list of unmatched file paths, if any, is then printed.


# Uses suggestions by msvalkon and Janne Karila in Stack Exchange Code Review:
# http://codereview.stackexchange.com/questions/41853/byte-by-byte-directory-comparison-ignoring-folder-structures-and-file-name-diffe


# Requires the progress bar library (2.2)
# https://pypi.python.org/pypi/progressbar/2.2
# http://code.google.com/p/python-progressbar/


import argparse
import collections
import filecmp
import os
import sys

from operator import itemgetter
#from progressbar import Bar, Percentage, ProgressBar


def main():
    help_description = \
    'Prints a list of the paths to the files that exist in the directory pointed \
to by directory_l, but that do not exist in the directory pointed to by \
directory_r. File name differences are ignored. Recursively scans \
subdirectories of directory_l and directory_r. Skips hidden files and folders \
by default. Files of the same size are compared byte by byte. Differences in \
directory structures are ignored. For example, if \
directory_l/subdirectory_1/file_name_1 and \
directory_r/subdirectory_2/subdirectory_3/file_name_2 match byte for byte, \
then directory_l/subdirectory_1/file_name_1 exists in directory_r.'

    parser = argparse.ArgumentParser(description = help_description)

    parser.add_argument('-a', '--all', action='store_true', help='include hidden \
files and folders')
    parser.add_argument('directory_l', help='path to a directory of files to \
search for')
    parser.add_argument('directory_r', help='path to a directory of files to \
search in')

    args = vars(parser.parse_args())

    include_hidden = args['all']

    directory_l = args['directory_l']
    directory_r = args['directory_r']

    if not os.path.isdir(directory_l):
        print "Invalid directory path: " + directory_l
        sys.exit(2)

    if not os.path.isdir(directory_r):
        print "Invalid directory path: " + directory_r
        sys.exit(2)

    unmatched = find_unmatched(directory_l, directory_r, include_hidden)

    # Prints the paths to any unmatched files.
    if not unmatched:
        print "No unmatched files."
    else:
        print "Unmatched files:"
        for file_path in unmatched:
            print file_path


def find_unmatched(directory_l, directory_r, include_hidden):
    print "Preprocessing..."

    # Creates (1)

    size_file_path_tuple_list_l = sizes_paths(directory_l, include_hidden)
    # Sorts the list by the first item in each tuple pair (size).
    size_file_path_tuple_list_l_sorted = sorted(size_file_path_tuple_list_l, \
key=itemgetter(1)) # (1)


    # Creates (2)

    size_file_path_tuple_list_r = sizes_paths(directory_r, include_hidden)
    size_to_file_path_list_dict_r = \
dict_of_lists(size_file_path_tuple_list_r) # (2)


    # Compares the files

    print "Comparing files..."

    unmatched = []

    # Creates a progress bar
    #pbar = ProgressBar(widgets=[Percentage(), Bar()], \
#maxval=len(size_file_path_tuple_list_l_sorted))
    #pbar.start()

    for i, (size_l, file_path_l) in enumerate(size_file_path_tuple_list_l_sorted):
        # size_to_file_path_list_dict_r[size_l] is a list of the paths to the files
        # in directory_r (recursively including subdirectories of directory_r and 
        # excluding hidden files and folders by default) that are the same size as 
        # the file pointed to by file_path_1.

        # Note that in the statement 'size_to_file_path_list_dict_r[size_l]', if 
        # size_l does not exist as a key in size_to_file_path_list_dict_r, then 
        # size_l is added as a key that maps to an empty list.
        if not file_match(file_path_l, size_to_file_path_list_dict_r[size_l]):
            # Either no files in directory_r (recursively including subdirectories of 
            # directory_r and excluding hidden files and folders by default) exist 
            # that are the same size as the file pointed to by file_path_l, or none 
            # of those that do are a byte by byte match.
            unmatched.append(file_path_l)

        #pbar.update(i)

    #pbar.finish()

    return unmatched


# Returns as tuple pairs the size of and path to each of the files in the 
# directory pointed to by 'top', recursively including subdirectories of 'top'. 
# Hidden files and folders are not returned unless 'include_hidden' is True.
def sizes_paths(top, include_hidden):
    for file_path in get_directory_file_paths(top, include_hidden):
        size = os.path.getsize(file_path)
        yield size, file_path


# Returns each of the paths to the files in the directory pointed to by 'top', 
# recursively including subdirectories of 'top'. Hidden files and folders are 
# not returned unless 'include_hidden' is True.
def get_directory_file_paths(top, include_hidden):
    for directory_path, folder_name_list, file_name_list in os.walk(top):
        # directory_path is the path to the current directory
        # folder_name_list is the list of all the folder names in the 
        # current directory
        # file_name_list is the list of the file names in the current directory
        if not include_hidden:
            # Ignore hidden files and folders
            # http://stackoverflow.com/questions/13454164/os-walk-without-hidden-folders
            # Answer by Martijn Pieters
            # Removes the file names that begin with '.' from the list of file names 
            # in the current directory.
            file_name_list = [f for f in file_name_list if not f[0] == '.']
            # Removes the folder names that begin with '.' from the list of folder 
            # names in the current directory.
            folder_name_list[:] = [f for f in folder_name_list if not f[0] == '.']

        for file_name in file_name_list:
            yield os.path.join(directory_path, file_name)


# Creates and returns a dictionary of lists from a list of tuple pairs. 
# The keys in the dictionary are the set of the unique first items from the 
# tuple pairs. Each of these keys is mapped to a list of all the second items 
# from the tuple pairs whose first item matches that key.
# Example:
# {'a': [1, 1], 'c': [1], 'b': [2, 3]} = 
# dict_of_lists([('a', 1), ('a', 1), ('b', 2), ('b', 3), ('c', 1)])
def dict_of_lists(item_list):
    # http://docs.python.org/2/library/collections.html#collections.defaultdict
    d = collections.defaultdict(list)
    for key, value in item_list:
        # If d[key] does not exist, an empty list is created and value is attached 
        # to it. Otherwise, if d[key] does exist, value is appended to it.
        d[key].append(value)
    return d


# Returns True if and only if any of the files pointed to by the file paths in 
# file_path_list_r are a byte by byte match for the file pointed to by 
# file_path_l.
# Note that file_path_list_r may be an empty list.
def file_match(file_path_l, file_path_list_r):
    return any(filecmp.cmp(file_path_l, file_path_r, False) \
for file_path_r in file_path_list_r)


main()