Python.Ready.ReadyApps #python #Python #ready #apps #readyapps #binarygenerator #lineslicer #columnslicer #keycounter #compare
READYAPPS
1. (READYAPP) BinaryGenerator.py
2. (READYAPP) LineSlicer.py
3. (READYAPP) ColumnSlicer.py
4. (READYAPP) ColumnSlicer.py with fully functional logging system
5. (READYAPP) Keycounter.py / Keycounteradv.py
6. (READYAPP) File_Compare_v1.py
7. (READYAPP) Get_large_file_sizes.py
8. (READYAPP) Remove Duplicates from file
9. (READYAPP) Hexdump Implementation
10.(READYAPP) FQ.py (File Query )
11.(READYAPP) GroupingSplitter.py
12.(READYAPP) TextMerger.py
13.(READYAPP) DirectoryCmp.py
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 6 04:36:32 2017
@author: panagos
"""
#######################################
# LIBRARIES
import os
import sys
import time
from datetime import datetime
import timeit
#######################################
# FUNCTIONS
def script_path_param(string1):
# PUT IT INTO MODULE
#import sys
import os
script_name = os.path.basename(string1)
script_dir = os.path.dirname(os.path.realpath(string1))
script_full = string1
return ( script_name, script_dir, script_full )
def script_time_param():
# Name: time parameter
# Function: script_time_param
# Input: None
# Output: string with formatted time
# Usage: print (script_time_param) or a = script_time_param
return time.strftime("%c")
def usage(string1):
print ("\n" + "Name: " + str(os.path.basename(string1)))
print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
return True
def inputscreenmenu():
print ('*' * 60)
print ('*' + " Binary converter " )
print ('*' * 60)
print ('*' + " Operations: \n" + '*')
print ("*" + "\t" + "1. From Binary")
print ("*" + "\t" + "2. To Binary" + "\n" + '*')
print ('*' * 60)
print ('*')
x = input('*' + " Choose Mode: ",)
print ('*' + " Script Starting ...")
return x
def timing_val(func):
def wrapper(*arg, **kw):
'''source: http://www.daniweb.com/code/snippet368.html'''
t1 = time.time()
res = func(*arg, **kw)
t2 = time.time()
print ('%r (%r, %r) %2.2f sec' % \
(__name__, arg, kw, t1-t2))
return (t2 - t1), res, func.__name__
return wrapper
#######################################
# WORK FUNCTIONS - TO DO
# Checks and controls
# Greeting string
# 2 modes of operation, to and from
# Output on screen
#st = "hello"
def to_binary(string1):
return ''.join('{0:08b}'.format(ord(x), 'b') for x in string1)
#print(to_binary(st))
def bin_tonormal(string1):
return ''.join([chr(int(x, 2)) for x in string1])
# Does not work
#print (bin_tonormal("0110100001100101011011000110110001101111"))
def bintoascii(stri):
listform = []
a = ''.join(chr(int(stri[i:i+8], 2)) for i in range(0, len(str(stri)), 8))
for j in a:
listform.append(str(j))
return (a, listform)
#print (bintoascii("0110100001100101011011000110110001101111")[0])
#print (bintoascii("0110100001100101011011000110110001101111")[1])
def string2bits(s=''):
return [bin(ord(x))[2:].zfill(8) for x in s]
#@timing_val
def bits2string(b=None):
return ''.join([chr(int(x, 2)) for x in b])
#s = input("Enter String: ")
#b = string2bits(s)
#s2 = bits2string(b)
#print ('String:')
#print (s)
#print (b)
#print (s2)
#print ('\nList of Bits:')
#for x in b:
# print (x, end="")
#print ('\nString:')
#print (s2)
############################################
#
starttime = datetime.now()
print ("@" + "\n" + "Script started at " + script_time_param() + "\n")
print ("Script parameters: ")
print (str(script_path_param(sys.argv[0])[0]))
print (str(script_path_param(sys.argv[0])[1]))
print (str(script_path_param(sys.argv[0])[2]))
print ("\n")
choice = inputscreenmenu()
#print (choice)
if choice == '1':
source = input("Enter binary string for conversion: ")
target = bintoascii(source)
#print (source)
#print (target)
#print ("Specific alphanumeric digits are stored in List:Target")
print ("Equivalint alphanumeric: " + str((target)[0]))
print ("Equivalent alphanumeric in list form: " + str((target)[1]))
elif choice == '2':
source = input("Enter alphanumeric string for conversion: ")
target = to_binary(source)
target2 = string2bits(source)
#print ("Specific binary digits are stored in List:Target")
print ("Equivalint binary: " + str(target))
print ("Equivalent binary in list form: " + str(target2))
print ("\n"+ "@\n" + "Script ended at " + script_time_param())
print ("Elapsed duration: " + str(datetime.now() - starttime))
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017
@author: P.Doulgeridis
"""
import os
import sys
import time
file_in = sys.argv[1]
start_in = int(sys.argv[2]) - 1
end_in = int(sys.argv[3]) - 1
file_ot_00 = str(file_in) + ".proc"
output_file_list = []
def usage(string1):
print ("\n" + "Name: " + str(os.path.basename(string1)))
print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
return True
def chck_args_num(var):
'''
Function : chck_args_num(var)
Description: Checks n. of variables
Input: Integer
Output: Boolean
Called as : chk_args(len(sys.argv))
'''
import sys
args_correct = 3
args_in = var
args_in_fixed = args_in - 1
if args_in_fixed != args_correct:
print ('Wrong number of arguments : ' + str(args_in_fixed))
print ('Must be : ' + str(args_correct))
return False
else:
print ('Correct number of arguments provided: ' + str(args_in_fixed) + "\n" )
return True
def script_path_param(string1):
# PUT IT INTO MODULE
#import sys
#import os
script_name = os.path.basename(string1)
script_dir = os.path.dirname(os.path.realpath(__file__)) + os.sep
script_full = script_dir + script_name
return ( script_name, script_dir, script_full )
def script_time_param():
# Name: time parameter
# Function: script_time_param
# Input: None
# Output: string with formatted time
# Usage: print (script_time_param) or a = script_time_param
return time.strftime("%c")
def count_file(file):
chars = words = lines = 0
with open(str(file), 'r') as in_file:
for line in in_file:
lines += 1
words += len(line.split())
chars += len(line)
return (lines, words, chars)
def filetoliststrip(file):
'''
Function: filetoliststrip
Description: Reads a file, stores in list (stripped)
Input: File
Output: List
Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
Notes: Path needs double \\ or reverse /
'''
file_in = str(file)
lines = list(open(file_in, 'r'))
content = [x.strip() for x in lines]
return content
#######################################
# SCRIPT START
print ("@@\n" + Script started at " + script_time_param() + "\n" )
print ("Script basic parameters: ")
print ("Script name" + script_path_param(sys.argv[0])[0])
print ("Script directory" + script_path_param(sys.argv[0])[1])
print ("Script full path" + script_path_param(sys.argv[0])[2])
print ("Parsing input file..\n")
try:
input_file = filetoliststrip(file_in)
except:
print ("ERROR: Parsing input file.\n")
# slice out the required part (lines) and add to output list
print ("Trimming input file..\n")
try:
output_file_list = input_file[start_in:end_in]
except:
print ("ERROR: Trimming input file\n")
# iterate over output list and print
print ('Writing to output..\n')
try:
ot1_file = open(file_ot_00, "w")
except:
print ("ERROR: Opening output file.\n")
for j in output_file_list:
ot1_file.write(j + "\n")
ot1_file.close()
print ("Script finished at " + script_time_param() + "\n" )
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017
@author: P.Doulgeridis
"""
import os
import sys
import time
file_in = sys.argv[1]
start_in = int(sys.argv[2]) - 1
file_ot_00 = str(file_in) + ".proc.col"
output_file_list = []
def usage(string1):
print ("\n" + "Name: " + str(os.path.basename(string1)))
print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
return True
def chck_args_num(var):
'''
Function : chck_args_num(var)
Description: Checks n. of variables
Input: Integer
Output: Boolean
Called as : chk_args(len(sys.argv))
'''
import sys
args_correct = 3
args_in = var
args_in_fixed = args_in - 1
if args_in_fixed != args_correct:
print ('Wrong number of arguments : ' + str(args_in_fixed))
print ('Must be : ' + str(args_correct))
return False
else:
print ('Correct number of arguments provided: ' + str(args_in_fixed) + "\n" )
return True
def script_path_param(string1):
# PUT IT INTO MODULE
#import sys
#import os
script_name = os.path.basename(string1)
script_dir = os.path.dirname(os.path.realpath(__file__)) + os.sep
script_full = script_dir + script_name
return ( script_name, script_dir, script_full )
def script_time_param():
# Name: time parameter
# Function: script_time_param
# Input: None
# Output: string with formatted time
# Usage: print (script_time_param) or a = script_time_param
return time.strftime("%c")
def count_file(file):
chars = words = lines = 0
with open(str(file), 'r') as in_file:
for line in in_file:
lines += 1
words += len(line.split())
chars += len(line)
return (lines, words, chars)
def filetoliststrip(file):
'''
Function: filetoliststrip
Description: Reads a file, stores in list (stripped)
Input: File
Output: List
Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
Notes: Path needs double \\ or reverse /
'''
file_in = str(file)
lines = list(open(file_in, 'r'))
content = [x.strip() for x in lines]
return content
#######################################
# SCRIPT START
print ("Script started at " + script_time_param() + "\n" )
print ("Script basic parameters: ")
print ("Script name" + script_path_param(sys.argv[0])[0])
print ("Script directory" + script_path_param(sys.argv[0])[1])
print ("Script full path" + script_path_param(sys.argv[0])[2])
print ("Parsing input file..\n")
try:
input_file = filetoliststrip(file_in)
except:
print ("ERROR: Parsing input file.\n")
# iterate over output list and print
print ('Writing to output..\n')
try:
ot1_file = open(file_ot_00, "w")
except:
print ("ERROR: Opening output file.\n")
for line in input_file:
ot1_file.write(line.split()[start_in] + "\n")
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017
@author: P.Doulgeridis
"""
import os
import sys
import time
import logging
# #############################################################
# Logging Configuration
# Available loggers:
# 1. logging (general)
# 2. logger1 (boot/checks)
# 3. logger3 (main code)
#
# Usage:
# 'application' code
# logger.debug('debug message')
# logger.info('info message')
# logger.warn('warn message')
# logger.error('error message')
# logger.critical('critical message')
#
# set up logging to file - see previous section for more details
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
datefmt='%m-%d %H:%M',
filename='hello.log',
filemode='w')
# define a Handler which writes INFO messages or higher to the sys.stderr
# If we want the lowest setting - we must set this to DEBUG.
console = logging.StreamHandler()
console.setLevel(logging.INFO)
# set a format which is simpler for console use
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
# tell the handler to use this format
console.setFormatter(formatter)
# add the handler to the root logger
logging.getLogger('').addHandler(console)
# Now, we can log to the root logger, or any other logger. First the root...
#logging.info('Jackdaws love my big sphinx of quartz.')
# logging.debug('THIS SHOULD NOT BE ON SCREEN')
# Now, define a couple of other loggers which might represent areas in your
# application:
logger1 = logging.getLogger('ColumnSlicer.BootChecks')
logger2 = logging.getLogger('ColumnSlicer.MainBody')
# Examples
#logger1.debug('Quick zephyrs blow, vexing daft Jim.')
#logger1.info('How quickly daft jumping zebras vex.')
# logger2.warning('Jail zesty vixen who grabbed pay from quack.')
# logger2.error('The five boxing wizards jump quickly.')
# logging.debug('blabla')
# ######
#
logger1.debug("Initial var assignment")
try:
file_in = sys.argv[1]
start_in = int(sys.argv[2]) - 1
file_ot_00 = str(file_in) + ".proc.col"
output_file_list = []
except:
logger1.critical("ERROR: At initial variable assignment")
logger1.debug("Initial function declaration")
def usage(string1):
print ("\n" + "Name: " + str(os.path.basename(string1)))
print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
return True
def chck_args_num(var, limit):
'''
Function : chck_args_num(var)
Description: Checks n. of variables
Input: Integer
Output: Boolean
Called as : chk_args(len(sys.argv))
'''
import sys
args_correct = limit
args_in = var
args_in_fixed = args_in - 1
if args_in_fixed != args_correct:
print ('Wrong number of arguments : ' + str(args_in_fixed))
print ('Must be : ' + str(args_correct))
return False
else:
print ('Correct number of arguments provided: ' + str(args_in_fixed) + "\n" )
return True
def script_path_param(string1):
# PUT IT INTO MODULE
#import sys
#import os
script_name = os.path.basename(string1)
script_dir = os.path.dirname(os.path.realpath(__file__)) + os.sep
script_full = script_dir + script_name
return ( script_name, script_dir, script_full )
def script_time_param():
# Name: time parameter
# Function: script_time_param
# Input: None
# Output: string with formatted time
# Usage: print (script_time_param) or a = script_time_param
return time.strftime("%c")
def count_file(file):
'''
Name: count_file
Function: Counts words/lines/chars in file
Input: filename
Output:tuple of (lines, words, chars)
Usage: a = count_file(file_in)
'''
chars = words = lines = 0
with open(str(file), 'r') as in_file:
for line in in_file:
lines += 1
words += len(line.split())
chars += len(line)
return (lines, words, chars)
def filetoliststrip(file):
'''
Function: filetoliststrip
Description: Reads a file, stores in list (stripped)
Input: File
Output: List
Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
Notes: Path needs double \\ or reverse /
'''
file_in = str(file)
lines = list(open(file_in, 'r'))
content = [x.strip() for x in lines]
return content
def reporting_f(orig_file, new_file):
'''
'''
#######################################
# SCRIPT START
logger2.info("@@\n")
logger2.info("Script started at " + script_time_param() + "\n" )
logger2.debug ("Script basic parameters: ")
logger2.debug("Script name" + script_path_param(sys.argv[0])[0])
logger2.debug ("Script directory" + script_path_param(sys.argv[0])[1])
logger2.debug ("Script full path" + script_path_param(sys.argv[0])[2])
logger2.info("Checking supplied arguments....")
if not chck_args_num(len(sys.argv), 2):
logger2.critical('Terminating Script....')
sys.exit()
logger2.info("Parsing input file..\n")
try:
input_file = filetoliststrip(file_in)
except:
logger2.critical("ERROR: Parsing input file.\n")
# iterate over output list and print
logger2.info('Writing to output..\n')
try:
ot1_file = open(file_ot_00, "w")
except:
logger2.critical("ERROR: Opening output file.\n")
for line in input_file:
ot1_file.write(line.split()[start_in] + "\n")
# Reporting
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017
@author: P.Doulgeridis
"""
import os
import sys
import time
file_in = sys.argv[1]
file_ot = str(file_in) + ".proc"
file_ot2 = str(file_in) + ".proc2"
file_ot3 = str(file_in) + ".proc3"
def filetoliststrip(file):
'''
Function: filetoliststrip
Description: Reads a file, stores in list (stripped)
Input: File
Output: List
Usage: print (filetoliststrip("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
Notes: Path needs double \\ or reverse /
'''
file_in = str(file)
lines = list(open(file_in, 'r'))
content = [x.strip() for x in lines]
return content
dict_in = dict()
seen = []
fileinlist = filetoliststrip(file_in)
out_file = open(file_ot, 'w')
out_file2 = open(file_ot2, 'w')
out_file3 = open(file_ot3, 'w')
for line in fileinlist:
keyf = line[10:69]
if keyf not in dict_in.keys():
dict_in[keyf] = []
dict_in[keyf].append(1)
dict_in[keyf].append(line)
else:
dict_in[keyf][0] += 1
dict_in[keyf].append(line)
for j in dict_in.keys():
#print(dict_in[j])
if dict_in[j][0] < 2:
out_file.write(dict_in[j][1])
elif dict_in[j][0] == 2:
out_file2.write(dict_in[j][2])
elif dict_in[j][0] > 2:
out_file3.write(dict_in[j][3])
out_file.close()
out_file2.close()
out_file3.close()
####################################################################
# ADVANCED VERSION FOR LARGE FILES
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017
@author: P.Doulgeridis
"""
import os
import sys
import time
import collections
file_in = sys.argv[1]
file_ot = str(file_in) + ".proc"
file_ot2 = str(file_in) + ".proc2"
file_ot3 = str(file_in) + ".proc3"
# dict_in = collections.defaultdict(list) # save some time with a dictionary factory
# with open(file_in, "r") as f: # open the file_in for reading
# for line in f: # read the file line by line
# print("Read line: " + str(line))
# key = line.strip()[10:69] # assuming this is how you get your key
# dict_in[key].append(line) # add the line as an element of the found key
# # now that we have the lines in their own key brackets, lets write them based on frequency
# with open(file_ot, "w") as f1, open(file_ot2, "w") as f2, open(file_ot3, "w") as f3:
# selector = {1: f1, 2: f2} # make our life easier with a quick length-based lookup
# for values in dict_in.values(): # use dict_in.itervalues() on Python 2.x
# selector.get(len(values), f3).writelines(values) # write the collected lines
dict_in = collections.defaultdict(list) # save some time with a dictionary factory
with open(file_in, "r") as f: # open the file_in for reading
for line in f: # read the file line by line
print("read line: " + str(line))
key = line.strip()[10:69] # assuming this is how you get your key
dict_in[key].append(line) # add the line as an element of the found key
# now that we have the lines in their own key brackets, lets write them based on frequency
with open(file_ot, "w") as f1, open(file_ot2, "w") as f2, open(file_ot3, "w") as f3:
selector = {1: f1, 2: f2} # make our life easier with a quick length-based lookup
for values in dict_in.values(): # use dict_in.itervalues() on python 2.x
if len(values) == 1:
f1.writelines(values)
elif len(values) == 2:
f2.writelines(values)
else:
f3.writelines(values)
#selector.get(len(values), f3).writelines(values) # write the collected lines
#!/usr/bin/python
###############################################################################
# Packages.
import sys
###############################################################################
# Global variables.
fltrName = ""
baseName = ""
comnName = ""
diffName = ""
fltrStrt = -1
fltrLeng = -1
baseStrt = -1
baseLeng = -1
# Constants.
ARG_NUM = 6
###############################################################################
# Functions.
def chk_args( p_sys_argv ):
"Check command-line arguments."
global ARG_NUM
global fltrName, baseName, comnName, diffName
global fltrStrt, fltrLeng, baseStrt, baseLeng
if len(p_sys_argv) - 1 != ARG_NUM :
print "WRONG number of arguments: ", len(p_sys_argv) - 1
print "Must be: ", ARG_NUM
print "Usage:", sys.argv[0], " File_Filter", "File_Base", " Filter_Start", "Filter_Length", " Base_Start", "Base_Length"
return 0
else :
fltrName = sys.argv[1]
baseName = sys.argv[2]
comnName = baseName + ".cmn"
diffName = baseName + ".dif"
fltrStrt = int(sys.argv[3], 10)
fltrLeng = int(sys.argv[4], 10)
baseStrt = int(sys.argv[5], 10)
baseLeng = int(sys.argv[6], 10)
return 1
###############################################################################
# Start.
print sys.argv[0], "Started." ; print
# Check arguments.
print "Checking arguments..."
if chk_args( sys.argv ) == 0 :
print "Exiting..."
sys.exit(69)
print "fltrName = ", fltrName
print "baseName = ", baseName
print "comnName = ", comnName
print "diffName = ", diffName
print "fltrStrt = ", fltrStrt
print "fltrLeng = ", fltrLeng
print "baseStrt = ", baseStrt
print "baseLeng = ", baseLeng
###############################################################################
# Load fltr file to table.
print; print "Loading filter file to table..."
fltrFile = open(fltrName, "r")
fltrList = []
fltrFrom = fltrStrt - 1
fltrTo = fltrFrom + fltrLeng
for fltrLine in fltrFile :
fltrIndx = fltrLine[fltrFrom : fltrTo]
fltrList.append(fltrIndx)
print(fltrIndx)
fltrFile.close()
###############################################################################
# Proces base file.
print; print "Processing base file..."
baseFile = open(baseName, "r")
comnFile = open(comnName, "w")
diffFile = open(diffName, "w")
baseFrom = baseStrt - 1
baseTo = baseFrom + baseLeng
for baseLine in baseFile:
baseIndx = baseLine[baseFrom : baseTo]
print(baseIndx)
if baseIndx in fltrList :
comnFile.write(baseLine)
else :
diffFile.write(baseLine)
baseFile.close()
comnFile.close()
diffFile.close()
###############################################################################
# End.
print; print sys.argv[0], "Ended."
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 31 12:45:00 2017
@author: P.Doulgeridis
"""
# necessary module to work with excel
#import openpyxl
# documentation: http://openpyxl.readthedocs.org/.
import os
import sys
rootDir = sys.argv[1]
sizing = sys.argv[2]
# for folderName, subFolders, fileNames in os.walk(rootDir):
# for fileName in fileNames:
# filePath = os.path.join(folderName, fileName)
# if os.path.exists(filePath):
# fileSize = os.path.getsize(filePath)
# fileSize = fileSize/1024/1024 # Convert it to MB
# if fileSize > int(sizing):
# print("{0}\t{1}".format(fileSize,filePath))
def get_small_files(rootDir, sizing):
r"""
Name: get_small_files
Function: get_small_files(rootDir, sizing)
Input: rootDir, sizing
Output: list
Usage: print (get_large_files('/', 600))
Storage: PYTHON FILE UTILITIES
Notes: Recursive, returns a list of files.
"""
# Necessary modules
import os
import sys
# Initiate list
list_out = []
# Iterate over os.walk, recursive
for folderName, subFolders, fileNames in os.walk(rootDir):
for fileName in fileNames:
filePath = os.path.join(folderName, fileName)
# check if file exists
if os.path.exists(filePath):
fileSize = os.path.getsize(filePath)
fileSize = fileSize/1024/1024 # Convert it to MB
# Compare and output.
if fileSize < int(sizing):
print("{0}\t{1}".format(fileSize,filePath))
list_out.append((fileSize, filePath))
return list_out
def get_large_files(rootDir, sizing):
r"""
Name: get_large_files
Function: get_large_files(rootDir, sizing)
Input: rootDir, sizing
Output: list
Usage: print (get_large_files('/', 1))
Storage: PYTHON FILE UTILITIES
Notes: Recursive, returns a list of files.
"""
# Necessary modules
import os
import sys
# Initiate list
list_out = []
# Iterate over os.walk, recursive
for folderName, subFolders, fileNames in os.walk(rootDir):
for fileName in fileNames:
filePath = os.path.join(folderName, fileName)
# check if file exists
if os.path.exists(filePath):
fileSize = os.path.getsize(filePath)
fileSize = fileSize/1024/1024 # Convert it to MB
# Compare and output
if fileSize > int(sizing):
print("{0}\t{1}".format(fileSize,filePath))
list_out.append((fileSize, filePath))
return list_out
#print (get_large_files('/', 600))
print (get_small_files('/', 1))
import sys
import os
import subprocess
file_in = sys.argv[1]
range = int(sys.argv[2])
file_ot = file_in + ".proc"
print(file_in)
print(file_ot)
file_out = open(file_ot, 'w')
counter_line = 0
counter_title = 0
total_lines = 0
current_line = ""
previous_lines = []
dicta = dict()
with open(file_in, 'r') as f:
for line in f:
counter_line += 1
# control substring
control_s = line[10:20]
if control_s not in dicta.keys():
dicta[control_s] = []
dicta[control_s].append(line)
else:
dicta[control_s].append(line)
key_list = []
line_length = 0
for j in dicta.keys():
line_length += len(dicta[j])
###########################################################
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 31 10:52:24 2017
@author: P.Doulgeridis
"""
import sys
import os
file_input = sys.argv[1]
file_ot = file_input + ".py.nodupl"
print(file_input)
print(file_ot)
seen = set()
outfile = open(file_ot, 'w')
for line in open(file_in, 'r'):
# control = line[1:10]
control = line
if control not in seen:
seen.add(control)
outfile.write(line + "\n")
outfile.close()
DOWNLOAD VIEW RAW
def hexdump(src, length=16):
FILTER = ''.join([(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)])
lines = []
for c in range(0, len(src), length):
chars = src[c:c+length]
hex = ' '.join(["%02x" % ord(x) for x in chars])
printable = ''.join(["%s" % ((ord(x) <= 127 and FILTER[ord(x)]) or '.') for x in chars])
lines.append("%04x %-*s %s\n" % (c, length*3, hex, printable))
return ''.join(lines)
# print(hexdump("asdfasdfsdfsdfasdfasdfasdfaksdfaksdfjaksdjsfasd", length=16))
#
#0000 61 73 64 66 61 73 64 66 73 64 66 73 64 66 61 73 asdfasdfsdfsdfas
#0010 64 66 61 73 64 66 61 73 64 66 61 6b 73 64 66 61 dfasdfasdfaksdfa
#0020 6b 73 64 66 6a 61 6b 73 64 6a 73 66 61 73 64 ksdfjaksdjsfasd
#!/usr/bin/python
# #################################################################
#
#
# -----------------------------------------------------------------
#
#
#
# -----------------------------------------------------------------
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
# #####################################################################
# to do
# reporting
# counters
import os
import sys
import time
def usage():
print("""
Name: FQ.py
Called as: FQ.py <file_in> <start> <end> <pattern1> <pattern2> ...<patternN>
Input: <file_in> : full path to file
<start> : integer
<end> : integer
<pattern.> : string
""")
# script time param
def script_time_param():
# Name: time parameter
# Function: script_time_param
# Input: None
# Output: string with formatted time
# Usage: print (script_time_param) or a = script_time_param
return time.strftime("%c")
# script params
def script_path_param(string1, vocal = 'yes'):
'''
Name: script_path_param
Function: script_path_param(string1, vocal = 'yes')
Input: script_path_param(sys.argv[0])
Output: [ T | F ]
Usage: Precaches basic script parameters
Notes: None
'''
#import sys
#import os
#Called as: script_path_param(sys.argv[0])
try:
script_name = os.path.basename(string1)
script_dir = os.path.dirname(os.path.realpath(string1))
script_full = script_dir + script_name
except:
print("Error loading script parameters. Possible problem with os module")
if vocal != 'yes':
return ( script_name, script_dir, script_full )
else:
print("Script name: " + str(script_name))
print("Script directory: " + str(script_dir))
print("Script full: " + str(script_full))
print ('Script started at: ' + time.strftime("%c"))
return True
# isinteger
def typecheck(input, type):
'''
Name: typecheck
Function: typecheck(input, type)
Input: * data
* type = type definition:
int,
string | str,
list,
dict,
tuple,
defaultdict,
array
Output: [ T | F ]
Usage: Checks data type against a specific type.
Notes: None
'''
return isinstance(input, type)
# file exists
def fileexists(filepath):
'''
Function: filexists
Description: Checks for existence of file
Input: filepath (or raw string of it)
Output: Boolean
Usage: if filexists(file_in):...
Notes: Depending on system may need to
conver to raw string with r'file_in.
'''
import os.path
if os.path.exists(filepath):
return True
else:
return False
# timer
def timing_val(func):
'''
Author: p.doulgeridis
Name: timing_val
Function: timing_val(func)
Input: none, decorator function
Output: prints elapsed time of execution
Usage: @timing_val over function declaration
Notes: wrapper timing function.
'''
def wrapper(*arg, **kw):
t1 = time.time()
res = func(*arg, **kw)
t2 = time.time()
print ('%r (%r, %r) %0.9f sec' % \
(func.__name__, arg, kw, t1-t2))
return (t2 - t1), res
return wrapper
def calcoffsetsub(sub_start, sub_length):
'''
Name: Calculates awk/bash style offset
Function: calcoffsetsub
Input: start, end
Output: start, length for python
Usage: calcoffsetsub(2, 10)
Notes: None
'''
start_int = int(sub_start)
length_int = int(sub_length)
start_real = start_int - 1
end_real = start_real + length_int
return (start_real, end_real)
def parsearguments(list_in):
'''
Name: parsearguments
Function: parsearguments(list_in)
Input: sys.argv
Output: <list> - arguments past the 3rd.
Usage: parsearguments(sys.argv)
Notes: None, script specific.
'''
out_list = []
for index, argument in enumerate(sys.argv):
if index > 3:
out_list.append(argument)
return out_list
# chk_args_type
def chck_args_type(string):
'''
Function : chck_args_type(string)
Description: Checks the content of parameters
Input: sys.argv
Output: STDOUT print
Called as chck_args_type(sys.argv)
'''
for i in range(len(string)):
if i == 0:
print "Arguments for script: %s" % sys.argv[0]
else:
print "%d. argument: %s" % (i,sys.argv[i])
print("\n")
return 0
def script_end():
import time
print("\n" + "Script ended at: " + time.strftime("%c"))
@timing_val
def parse_file(file_in, filter_list_in):
counter = 0
count_found = 0
file_ot = str(file_in) + ".filterpy"
out_file = open(file_ot, 'w')
with open(file_in, 'r') as f:
for i, line in enumerate(f):
fixed_line = line.rstrip()
#print(fixed_line[calcoffsetsub(sub_start, sub_length)[0]:calcoffsetsub(sub_start, sub_length)[1]])
key = fixed_line[calcoffsetsub(sub_start, sub_length)[0]:calcoffsetsub(sub_start, sub_length)[1]]
#print(key)
if key in filter_list:
count_found += 1
out_file.write(line)
print("Read: " + str(i) + " lines.")
print("Found: " + str(count_found) + " matches.")
# #######################################################
# Initial checks
# Parse non optinal parameters.
try:
file_in = sys.argv[1]
sub_start = sys.argv[2]
sub_length = sys.argv[3]
sub_pat = sys.argv[4]
except:
print("Error: Parameter parsing. Terminating script")
sys.exit(1)
# Check for existence of input file
if not fileexists(file_in):
print("\n" + "Provided file could not be detected. Terminating")
usage()
sys.exit(1)
# Typecheck the next two positional parameters against int
if not typecheck(int(sub_start), int):
print("\n" + "Provided number is not an integer. Terminating")
usage()
sys.exit(1)
if not typecheck(int(sub_length), int):
print("\n" + "Provided number is not an integer. Terminating")
usage()
sys.exit(1)
# ########################################################
# Script Start
print("Initial controls successful. Launching script...")
print("\n")
# print script parameters
script_path_param(sys.argv[0])
print("\n")
# product filter_list from arguments
filter_list = parsearguments(sys.argv)
print("\n")
# check args type - on screen report
chck_args_type(sys.argv)
print("\n")
# Call main work - time it.
parse_file(file_in, filter_list)
# ##########################################################
# Script end
script_end()
#!/usr/bin/python
# -*- coding: utf-8 -*-
# ##########################################################################
#
# Author: P.Doulgeridis
#
# # ------------------------------------------------------------------------
# Use as: python BNsplit.py <file_in> <lines> <start> <end> <-v|-q>
#
# <file_in>: BN
# <lines> : ~42k
# <start> : integer
# <end> : integer
#
#
#
# # ------------------------------------------------------------------------
#
# Location: C:\Users\p.doulgeridis\Desktop\weirdsplit\weirdsplit.py
#
# # ------------------------------------------------------------------------
# Function: Reads the input file and limit and starts parsing, if the limit
# gets exceeded mid grouping, then the entire group will be outputed
# in the same file, then a new file will be initialized.
#
# # ------------------------------------------------------------------------
#
# Notes:
# Encoding problem with input file: solved with errors=ignore.
# Check encoding python.
#
# Used to split BN files in banks <= 14 and banks > 14
#
# # ------------------------------------------------------------------------
#
# ##########################################################################
# Necessary Modules
import sys
import os
import time
try:
import argparse
except:
print("Failed to load argparse. Terminating")
sys.exit(2)
# ##########################################################################
# Initiate Parser
# MODIFY: CENTRAL HELP TEXT
parser = argparse.ArgumentParser(
prog="GroupSplit",
formatter_class=argparse.RawDescriptionHelpFormatter,
#description="calculate X to the power of Y",
description='''\
#
# GroupSplit v.3
# --------------------------------
# Author: p.doulgeridis
# Description: Splits a text file into a number
# of files that have size = lines_in provided,
# keeping the groupings intact.
#
# Caution: Certain files may exceed limit due to grouping.
#
# ''',
epilog="Additional info")
#######
# Initiate mutually exclusive group.
# SET BY DEFAULT FOR VERBOSE/QUIET
# IF YOU NEED MORE EXCLUSIVE OPTIONS, ADD A DIFFERENT GROUP.
#
group = parser.add_mutually_exclusive_group()
group.add_argument("-v", "--verbose", action="count", default=0)
group.add_argument("-q", "--quiet", action="store_true")
######
# Positional Arguments (Necessary)
# POSSIBLE KINDS (actions, types)
#
parser.add_argument("file", type=str, help="Provide the file")
parser.add_argument("lines", type=int, help="Provide the target lines")
parser.add_argument("start", type=int, help="Provide the beginning of substring - notepad++ column")
parser.add_argument("end", type=int, help="Provide the end of the substring - notepad++ column")
######
# Parse arguments
args = parser.parse_args()
######
# Assign arguments
# NUM_OF_LINES=args.lines
# filename = args.file
# ##################################################################
# Declare functions and wrappers
# Reporting func
def reporting(lines_in):
print("Operation Finished. Read: " + str(lines_in) + " lines.")
# file exists
def fileexists(filepath):
'''
Function: filexists
Description: Checks for existence of file
Input: filepath (or raw string of it)
Output: Boolean
Usage: if filexists(file_in):...
Notes: Depending on system may need to
conver to raw string with r'file_in.
'''
import os.path
if os.path.exists(filepath):
return True
else:
return False
# isinteger
def typecheck(input, type):
return isinstance(input, type)
# file backup
def backupfile(src):
import shutil
backup = str(src) + ".bak"
# script params
def script_path_param(string1, vocal = 'yes'):
'''
Name:
Function:
Input:
Output:
Usage:
Notes:
'''
#import sys
#import os
# Called as: script_path_param(sys.argv[0])
try:
script_name = os.path.basename(string1)
script_dir = os.path.dirname(os.path.realpath(string1))
script_full = script_dir + script_name
except:
print("Error loading script parameters. Possible problem with os module")
if vocal != 'yes':
return ( script_name, script_dir, script_full )
else:
print("Script name: " + str(script_name))
print("Script directory: " + str(script_dir))
print("Script full: " + str(script_full))
print ('Script started at: ' + time.strftime("%c"))
return True
# Script End
def script_end():
import time
print("Script ended at: " + time.strftime("%c"))
# script time param
def script_time_param():
# Name: time parameter
# Function: script_time_param
# Input: None
# Output: string with formatted time
# Usage: print (script_time_param) or a = script_time_param
return time.strftime("%c")
# ##################################################################
# PARSE ARGUMENTS AND FORMAT
try:
file_in = args.file
input_line_limit = args.lines
start_in = args.start
end_in = args.end
except:
print("Failed to parse arguments")
else:
print("Arguments parsed succesfully")
finally:
start_in = int(start_in)
end_in = int(end_in)
# ###################################################################
# CHECKS
if not fileexists(file_in):
print("Error: Input file could not be located. Terminating.")
sys.exit(1)
if not typecheck(start_in, int):
print("\n" + "Error: Provided number is not an integer. Terminating")
sys.exit(9)
if not typecheck(end_in, int):
print("\n" + "Error: Provided number is not an integer. Terminating")
sys.exit(9)
if not typecheck(input_line_limit, int):
print("\n" + "Error: Provided number is not an integer. Terminating")
sys.exit(9)
# #####################################################################
# Initialize Iteration Vars
group = ""
prev_group = ""
file_count = 0
dict_group = {}
# #####################################################################
# Script Start
# Handle quiet/verbose arguments
if args.quiet:
pass
elif args.verbose:
# Handle multiple verbosity values (ie, -vvv)
if args.verbose > 3:
# full script reporting
print("\n" + "Launching script:")
script_path_param(sys.argv[0])
print("\n")
chck_args_type(sys.argv)
elif args.verbose >= 2:
script_path_param(sys.argv[0])
elif args.verbose >= 1:
print("Processing file: " + str(file_in))
with open(file_in, 'r') as f:
fout = open(str(file_in) + ".out" + ".0.txt", "w")
line_status = 0
file_count += 1
counter = 0
last_line = ''
total_lines = 0
file_line_counter = 0
file_lines = []
for i, lines in enumerate(f):
print("#####")
print("Line: " + str(i) + " with content: " + str(lines))
last_line = lines
# parse group
#group = lines[2:6]
group = lines[start_in:end_in]
print("Comp: " + str(group) + " : " + str(prev_group))
# check if group is different than previous
if group != prev_group:
print("new group: " + str(group))
# check line status
print("Checking line status: " + str(line_status) + " <-> " + str(input_line_limit))
if line_status < input_line_limit:
if prev_group in dict_group.keys():
print("Writing to output")
for j in dict_group[prev_group]:
line_status += 1
fout.write(j)
file_line_counter += 1
file_lines.append(file_line_counter)
file_line_counter = 0
if line_status >= input_line_limit:
print("Checking line status: " + str(line_status) + " <-> " + str(input_line_limit))
print("Starting new file")
#file_count += 1
fout.close()
fout = open(str(file_in) + ".out" + ".%d.txt"%(file_count), "w")
file_count += 1
line_status = 0
# initialize new group
dict_group = {}
dict_group[group] = []
# add line to group
dict_group[group].append(lines)
else:
# same group
dict_group[group].append(lines)
prev_group = group
for j in dict_group[group]:
fout.write(j)
file_line_counter += 1
file_lines.append(file_line_counter)
file_line_counter = 0
#fout.write(lines)
fout.close()
# Reporting
print("Reporting:")
print(file_lines)
print("Lines read: " + str(int(i + 1))) # i starts at 0
print("Total lines in all files: " + str(sum(file_lines)))
# ##########################################################################################
# Script end
script_end()
#!/usr/bin/python
# ##########################################################################
# Necessary Modules
import sys
import os
import time
try:
import collections
except:
print("Failed to load module: Collections")
sys.exit(1)
else:
print("Module: collections loaded succesfully")
try:
import argparse
except:
print("Failed to load argparse. Terminating")
sys.exit(2)
# ##########################################################################
# Initiate Parser
# MODIFY: CENTRAL HELP TEXT
parser = argparse.ArgumentParser(
prog="BNSplitcalc",
formatter_class=argparse.RawDescriptionHelpFormatter,
#description="calculate X to the power of Y",
description='''\
#
# BNSplit v.2
# --------------------------------
# Author: p.doulgeridis
# Description: Splits a text file into a number
# of files that have size = lines_in provided,
# keeping the groupings intact.
#
# Caution: Certain files may exceed limit due to grouping.
#
# ''',
epilog="Additional info")
#######
# Initiate mutually exclusive group.
# SET BY DEFAULT FOR VERBOSE/QUIET
# IF YOU NEED MORE EXCLUSIVE OPTIONS, ADD A DIFFERENT GROUP.
#
group = parser.add_mutually_exclusive_group()
group.add_argument("-v", "--verbose", action="count", default=0)
group.add_argument("-q", "--quiet", action="store_true")
######
# Positional Arguments (Necessary)
# POSSIBLE KINDS (actions, types)
#
parser.add_argument("file", type=str, help="Provide the file")
# parser.add_argument("start", type=int, help="Provide the beginning of substring - notepad++ column")
# parser.add_argument("end", type=int, help="Provide the end of the substring - notepad++ column")
######
# Parse arguments
args = parser.parse_args()
######
# Assign arguments
# NUM_OF_LINES=args.lines
# ##################################################################
# Declare functions and wrappers
# Reporting func
def reporting(lines_in):
print("Operation Finished. Read: " + str(lines_in) + " lines.")
# file exists
def fileexists(filepath):
'''
Function: filexists
Description: Checks for existence of file
Input: filepath (or raw string of it)
Output: Boolean
Usage: if filexists(file_in):...
Notes: Depending on system may need to
conver to raw string with r'file_in.
'''
import os.path
if os.path.exists(filepath):
return True
else:
return False
# isinteger
def typecheck(input, type):
return isinstance(input, type)
# file backup
def backupfile(src):
import shutil
backup = str(src) + ".bak"
# script params
def script_path_param(string1, vocal = 'yes'):
'''
Name:
Function:
Input:
Output:
Usage:
Notes:
'''
#import sys
#import os
# Called as: script_path_param(sys.argv[0])
try:
script_name = os.path.basename(string1)
script_dir = os.path.dirname(os.path.realpath(string1))
script_full = script_dir + script_name
except:
print("Error loading script parameters. Possible problem with os module")
if vocal != 'yes':
return ( script_name, script_dir, script_full )
else:
print("Script name: " + str(script_name))
print("Script directory: " + str(script_dir))
print("Script full: " + str(script_full))
print ('Script started at: ' + time.strftime("%c"))
return True
def pretty_print(b):
'''
Function: pretty_print
Description : Pretty prints a dictionary
Input : Dictionary
Output: STDOUT
Usage(print) : pretty_print(b)
Usage(Assign): b = pretty_print(b) - True
Notes : Only prints on screen
'''
print ("{ ")
for a in b.keys():
print ( "\t" + str(a) + " : " + str(b[a]) )
print ("}\n")
# Script End
def script_end():
import time
print("Script ended at: " + time.strftime("%c"))
# script time param
def script_time_param():
# Name: time parameter
# Function: script_time_param
# Input: None
# Output: string with formatted time
# Usage: print (script_time_param) or a = script_time_param
return time.strftime("%c")
def keywithmaxval(d):
""" a) create a list of the dict's keys and values;
b) return the key with the max value"""
if len(d) != 0:
v=list(d.values())
k=list(d.keys())
return k[v.index(max(v))]
else:
return 0
def gettotalvalue(list_in, dict_in):
in_dict = dict(dict_in)
#print(in_dict)
ot = 0
for j in list_in:
#print(j, typecheck(j, str),in_dict[j])
ot += dict_in[j]
return ot
def dictfunc(dict_in):
print(dict_in)
def process(dict_in, limit_in):
itercounter = 0
dict_copy = dict(dict_in)
pretty_print(dict_copy)
print("LIMIT IS: " + str(limit_in))
while len(dict_copy) != 0:
itercounter += 1
proc_key = keywithmaxval(dict_copy)
print(proc_key)
outlist1_length = gettotalvalue(outlist1, dict_count)
outlist2_length = gettotalvalue(outlist2, dict_count)
print("Outlist1: " + str(outlist1_length))
print("Outlist2: " + str(outlist2_length))
if itercounter % 2 == 1:
outlist1.append(proc_key)
del dict_copy[proc_key]
else:
outlist2.append(proc_key)
del dict_copy[proc_key]
if gettotalvalue(outlist1, dict_count) > limit_in:
print("LIMIT EXCEEDED")
for j in dict_copy.keys():
outlist2.append(j)
del dict_copy[j]
print(outlist1, gettotalvalue(outlist1, dict_count))
print(outlist2, gettotalvalue(outlist2, dict_count))
return (outlist1, outlist2)
#pretty_print(dict_copy)
#del dict_copy[proc_key]
# pretty_print(dict_count2)
# pretty_print(dict_count)
print(outlist1, gettotalvalue(outlist1, dict_count))
print(outlist2, gettotalvalue(outlist2, dict_count))
return (outlist1, outlist2)
# ##################################################################
# PARSE ARGUMENTS AND FORMAT
try:
file_in = args.file
start_in = 2
end_in = 6
except:
print("Failed to parse arguments")
else:
print("Arguments parsed succesfully")
finally:
start_in = int(start_in)
end_in = int(end_in)
# ###################################################################
# CHECKS
if not fileexists(file_in):
print("Error: Input file could not be located. Terminating.")
sys.exit(1)
if not typecheck(start_in, int):
print("\n" + "Error: Provided number is not an integer. Terminating")
sys.exit(9)
if not typecheck(end_in, int):
print("\n" + "Error: Provided number is not an integer. Terminating")
sys.exit(9)
# if not typecheck(input_line_limit, int):
# print("\n" + "Error: Provided number is not an integer. Terminating")
# sys.exit(9)
# #####################################################################
# Initialize Iteration Vars
# #####################################################################
group = ""
prev_group = ""
file_count = 0
counter2 = 0
dict_group = {}
# #####################################################################
# Script Start
# #####################################################################
# Handle quiet/verbose arguments
if args.quiet:
pass
elif args.verbose:
# Handle multiple verbosity values (ie, -vvv)
if args.verbose > 3:
# full script reporting
print("\n" + "Launching script:")
script_path_param(sys.argv[0])
print("\n")
chck_args_type(sys.argv)
elif args.verbose >= 2:
script_path_param(sys.argv[0])
elif args.verbose >= 1:
print("Processing file: " + str(file_in))
# Calculate input_line_limit
print("Calculating input line limit....")
count = len(open(file_in).readlines())
print("Total line count: " + str(count))
input_line_limit = int(count / 2)
print("Estimated split: " + str(int(count / 2)))
print("Estimated split: " + str(input_line_limit))
# Initialize two dicts
# 1. for entire data
# 2. for counters and picking
dict_out = collections.defaultdict(list)
dict_count = collections.defaultdict(int)
#print(start_in)
#print(end_in)
print("Reading input file and populating dict...")
with open(file_in, 'r') as f:
for line in f:
file_count += 1
line_fixed = line.rstrip("\n")
key = line_fixed[start_in:end_in]
#print(key)
dict_out[key].append(line_fixed)
#pretty_print(dict_out)
print("Reading populated dict, creating counter dict")
for key_in in dict_out.keys():
dict_count[key_in] = len(dict_out[key_in])
counter2 += dict_count[key_in]
print("\n" + "Reporting:")
print("read: " + str(file_count))
print("dict: " + str(counter2))
pretty_print(dict_count)
#print(keywithmaxval(dict_count))
#print(len(dict_count))
# Initialize output lists, and processed key list.
outlist1 = []
outlist2 = []
processed_keys = []
out_counter = 0
# ##################################################################
# Main Job
# ##################################################################
# Initialize output filenames
file_ot = str(file_in) + ".out.0.txt"
file_ot2 = str(file_in) + ".out.1.txt"
# Open output files
outfile1 = open(file_ot, 'w')
outfile2 = open(file_ot2, 'w')
# Calculating split
# Call on main function -> process
print("Calculate split....")
output = process(dict_count, input_line_limit)
print("Output is: " + str(output))
print("Processing 1st list: " + str(output[0]))
for j in output[0]:
print("Processing key and writing : " + str(j))
processed_keys.append(j)
for line in dict_out[j]:
outfile1.write(str(line) + "\n")
out_counter += 1
print("Processing 2nd list: " + str(output[1]))
for j in output[1]:
print("Processing key and writing: " + str(j))
processed_keys.append(j)
for line in dict_out[j]:
outfile2.write(str(line) + "\n")
out_counter += 1
outfile1.close()
outfile2.close()
# ###################################################################
# Final Controls and reporting
# ###################################################################
print("Final control...")
print("Lines read: " + str(count))
print("Lines written: " + str(count))
print("Processed keys: " + str(processed_keys))
difference = int(count) - int(out_counter)
if difference != 0:
print("PROBLEM")
sys.exit(1)
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 16:56:00 2017
@author: P.Doulgeridis
"""
import os
import sys
import time
import collections
file_in = sys.argv[1]
file_ot = str(file_in) + ".proc"
file_ot2 = str(file_in) + ".proc2"
file_ot3 = str(file_in) + ".proc3"
counter = 0
dict_in = collections.defaultdict(list)
with open(file_in, "r") as f:
for line in f:
#print("read line: " + str(line))
counter += 1
fixed_line = line.strip()
line_list = fixed_line.split(";")
key = line_list[0][:12]
print(":Key: " + str(key))
dict_in[key].append(line)
with open(file_ot, "w") as f1, open(file_ot2, "w") as f2, open(file_ot3, "w") as f3:
selector = {1: f1, 2: f2}
for values in dict_in.values():
if len(values) == 1:
f1.writelines(values)
elif len(values) == 2:
f2.writelines(values)
else:
f3.writelines(values)
print("Read: " + str(counter) + " lines")
# -*- coding: utf-8 -*-
"""
Created on : 2019-07-25 01:56:25.767359
Name : script_main.py
Project : [[project]
Package : [[PackageName]]
Function : [[Description]]
Type : [[Debug | Util | Flow ]]
Generated by :
@author: P.Doulgeridis
Documentation: [Enter a brief description]
Script IO : [List all relevant File IO]
System Calls : [Yes/No]
Parameters : [List parameters w/ type]
Run as : [List usage example]
"""
#######################################################################
# Imports
#######################################################################
# Standard Modules
import sys
import os
import time
#######################################################################
# Adv Modules
#
try:
import argparse
except:
autolog("Could not import argparser module. Check requirements file")
#
try:
import logging
except:
autolog("Could not import logging module. Check requirements file")
#
try:
import string
except:
autolog("Could not import string module. Check requirements file")
#
try:
import operator
except:
autolog("Could not import operator module. Check requirements file")
#
try:
import collections
except:
autolog("Could not import collections module. Check requirements file")
#
try:
from datetime import datetime
except:
autolog("Failed to load module: datetime")
#
try:
import subprocess
except:
autolog("Failed to load module: Subprocess")
#######################################################################
# Variables
ARGS_NUM = 2
t_counter = 0
s_counter = 0
#######################################################################
# Functions
# timer
def timing_val(func):
'''
Author: p.doulgeridis
Name: timing_val
Function: timing_val(func)
Input: none, decorator function
Output: autologs elapsed time of execution
Usage: @timing_val over function declaration
Notes: wrapper timing function.
'''
def wrapper(*arg, **kw):
t1 = time.time()
res = func(*arg, **kw)
t2 = time.time()
autolog ('%r (%r, %r) %0.9f sec' % \
(func.__name__, arg, kw, t1-t2))
return (t2 - t1), res
return wrapper
def chck_args_num(var, arg_num):
'''
Function : chck_args_num(var)
Description: Checks n. of variables
Input: Integer
Output: Boolean
Called as : chk_args(len(sys.argv))
'''
import sys
args_correct = int(arg_num)
args_in = var
args_in_fixed = args_in - 1
autolog ("Checking provided arguments: ")
if args_in_fixed != args_correct:
autolog ('Wrong number of arguments : ' + str(args_in_fixed))
autolog ('Must be : ' + str(args_correct))
return False
else:
autolog ('Correct number of arguments provided: ' + str(args_in_fixed) + "\n" )
return True
def usage():
autolog("""
Name: FQ.py
Called as: FQ.py <file_in> <start> <end> <pattern1> <pattern2> ...<patternN>
Input: <file_in> : full path to file
<start> : integer
<end> : integer
<pattern.> : string
""")
# script time param
def script_time_param():
# Name: time parameter
# Function: script_time_param
# Input: None
# Output: string with formatted time
# Usage: autolog (script_time_param) or a = script_time_param
return time.strftime("%c")
def script_params(string1):
'''
Function: script_params(string)
Description: autologs all the basic script parameters
Input: sys.argv[0]
Output: STDOUT autolog
Called as: script_params(sys.argv[0])
Notes: Always returns true
'''
import time
import sys
sname = str(string1)
sdir = str(sys.path[0])
sfpath = str(sys.path[0] + "\\" + string1)
autolog ('Script name : ' + string1)
autolog ('Script directory : ' + sys.path[0] )
autolog ('Script full path: ' + sys.path[0] + "\\" + string1)
autolog ("\n")
autolog ('Script started at: ' + time.strftime("%c") + "\n")
return(sname, sdir, sfpath)
def script_path_param(string1, vocal = 'yes'):
'''
Name: script_path_param
Function: script_path_param(string1, vocal = 'yes')
Input: script_path_param(sys.argv[0])
Output: [ T | F ]
Usage: Precaches basic script parameters
Notes: None
'''
#import sys
#import os
#Called as: script_path_param(sys.argv[0])
try:
script_name = os.path.basename(string1)
script_dir = os.path.dirname(os.path.realpath(string1))
script_full = script_dir + script_name
except:
autolog("Error loading script parameters. Possible problem with os module")
if vocal != 'yes':
return ( script_name, script_dir, script_full )
else:
autolog("Script name: " + str(script_name))
autolog("Script directory: " + str(script_dir))
autolog("Script full: " + str(script_full))
autolog ('Script started at: ' + time.strftime("%c"))
return True
# chk_args_type
def chck_args_type(string):
'''
Function : chck_args_type(string)
Description: Checks the content of parameters
Input: sys.argv
Output: STDOUT autolog
Called as chck_args_type(sys.argv)
'''
for i in range(len(string)):
if i == 0:
autolog ("Arguments for script: %s" % sys.argv[0])
else:
autolog ("%d. argument: %s" % (i,sys.argv[i]))
autolog("\n")
return 0
def script_end():
import time
autolog("\n" + "Script ended at: " + time.strftime("%c"))
def pretty_autolog(b):
'''
Function: pretty_autolog
Description : Pretty autologs a dictionary
Input : Dictionary
Output: STDOUT
Usage(autolog) : pretty_autolog(b)
Usage(Assign): b = pretty_autolog(b) - True
Notes : Only autologs on screen
'''
autolog ("{ ")
for a in b.keys():
autolog ( "\t" + str(a) + " : " + str(b[a]) )
autolog ("}\n")
def autolog(message):
"""
Function: autolog()
Description: Autologs - autologs to stdout with caller and line.no
Input: Message <string>
Output: Message will be appended to the caller\line.no string
Usage: autolog("This is a test message from autolog")
Notes: Get the previous frame in the stack, otherwise it would be this function!!!
"""
#x = input("enter")
import inspect, logging
func = inspect.currentframe().f_back.f_code
#autolog(func)
#autolog(inspect.getframeinfo(f_code))
#autolog(inspect.stack())
#autolog(inspect.trace())
#x = input("enter")
# Dump the message + the name of this function to the log.
print("[[Script: %s]]:[Method: %s]: Line:%i -> : %s" % (
func.co_filename,
func.co_name,
func.co_firstlineno,
message
))
def autologfile(message, filename):
"""
Function: autolog()
Description: Autologs - autologs to stdout with caller and line.no
Input: Message <string>
Output: Message will be appended to the caller\line.no string
Usage: autolog("This is a test message from autolog")
Notes: Get the previous frame in the stack, otherwise it would be this function!!!
"""
import inspect, logging
func = inspect.currentframe().f_back.f_code
#autolog(func)
#autolog(inspect.getframeinfo(f_code))
#autolog(inspect.stack())
#autolog(inspect.trace())
# Dump the message + the name of this function to the log.
with open(filename, 'a') as f:
f.write("[[Script: %s]]:[Method: %s]: Line:%i -> : %s" % (
func.co_filename,
func.co_name,
func.co_firstlineno,
message
))
def fileexists(filepath):
'''
Function: filexists
Description: Checks for existence of file
Input: filepath (or raw string of it)
Output: Boolean
Usage: if filexists(file_in):...
Notes: Depending on system may need to
conver to raw string with r'file_in.
'''
import os.path
if os.path.exists(filepath):
return True
else:
return False
def direxists(filepath):
import os.path
if os.path.exists(filepath):
return True
else:
return False
@timing_val
def main_work(dir_in, file_ot):
global t_counter
separator_in ="""
#################################################################
"""
with open(file_ot, 'w', encoding='utf-8') as f:
for file in os.listdir(dir_in):
t_counter += 1
autolog("Processing: " + str(file))
filepath = os.path.abspath(os.path.join(dir_in, file))
autolog("Processing: " + str(filepath))
with open(filepath, 'r', encoding='utf-8') as g:
for line in g:
f.write(line)
f.write(separator_in)
autolog("Processed: " + str(t_counter) + " files.")
pass
#######################################################################
# Initial Checks
# Start.
autolog;
autolog (str(sys.argv[0] + " Started.\n") )
# Checking n. of arguments
if not chck_args_num(len(sys.argv), ARGS_NUM):
autolog ('Terminating Script....')
sys.exit()
# Report on script parameters
script_params(sys.argv[0])
########################################################################
# I/O
file_in = sys.argv[1]
file_ot = sys.argv[2]
#start_in = int(sys.argv[2])
#length_in = int(sys.argv[3])
#Check that input files exist
if not direxists(file_in):
autolog("File: " + str(file_in) + " failed to be located. Terminating.")
sys.exit(2)
else:
autolog("File: " + str(file_in) + " located. Proceeding.")
if fileexists(file_ot):
autolog("File: " + str(file_ot) + " already exists. Terminating.")
sys.exit(3)
else:
autolog("File: " + str(file_ot) + " ready to be written")
########################################################################
# Main Work
if __name__ == '__main__':
main_work(file_in, file_ot)
########################################################################
# Script end and reporting
autolog;
autolog (str(sys.argv[0]) +"Ended.")
autolog;
#
##########################################################################
# DIRECTORY COMPARE V.1 #
# #
# Usage : #
# #
# python DirComp.py <dir1> <dir2> #
# #
# Function : #
# #
# Sizes and compares directories based on filenames, prints out the #
# filenames that only exist in one directory of the two, for both #
# directories. #
# #
# Notes : #
# #
# Main function is "build_files_set" which takes a directory as input #
# and parses each file name in full path, relative path, and assigns #
# them to a set so we can do set operations. #
# #
# The compare_directories function simply calculates the differences #
# between the given sets. #
# #
# Modules: #
# #
# import os #
# import sys #
# import re #
# import subprocess #
# import time #
##########################################################################
import os
import sys
import re
import subprocess
import time
import collections
# Get the script path
def get_script_path():
return os.path.dirname(os.path.realpath(sys.argv[0]))
# Parse files of directory
def build_files_set(rootdir):
root_to_subtract = re.compile(r'^.*?' + rootdir + r'[\\/]{0,1}')
# Assign relative paths to set for comparison
files_set = set()
for (dirpath, dirnames, filenames) in os.walk(rootdir):
for filename in filenames + dirnames:
full_path = os.path.join(dirpath, filename)
relative_path = root_to_subtract.sub('', full_path, count=1)
files_set.add(relative_path)
return files_set
# Compare sets
def compare_directories(dir1, dir2):
files_set1 = build_files_set(dir1)
files_set2 = build_files_set(dir2)
return (files_set1 - files_set2, files_set2 - files_set1)
def compare_bool(dir1, dir2):
files_set1 = build_files_set(dir1)
files_set2 = build_files_set(dir2)
compare = lambda dir1, dir2: collections.Counter(files_set1) == collections.Counter(files_set2)
return compare
def are_eq(a, b):
files_set1 = build_files_set(a)
files_set2 = build_files_set(b)
return set(a) == set(b) and len(a) == len(b)
# Get size - Not working
def get_size(start_path = '.'):
total_size = 0
for dirpath, dirnames, filenames in os.walk(start_path):
for f in filenames:
fp = os.path.join(dirpath, f)
total_size += os.path.getsize(fp)
return total_size
# call to system du - working
def du(path):
"""disk usage in human readable format (e.g. '2,1GB')"""
return subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8')
def main():
if __name__ == '__main__':
# ##
# Script parameters
print (' ')
print ("Script run at: " + str(time.strftime("%H:%M:%S")))
print ("Script run from: " + get_script_path())
# ##
# Process arguments
print (' ')
total = len(sys.argv)
cmdargs = str(sys.argv)
# print ("The total numbers of args passed to the script: %d " % total)
# print ("Args list: %s " % cmdargs)
# print ("Script name: %s" % str(sys.argv[0]))
# print ("First argument: %s" % str(sys.argv[1]))
# print ("Second argument: %s" % str(sys.argv[2]))
dir1 = str(sys.argv[1])
dir2 = str(sys.argv[2])
# DEBUG
# dir1 = '/home/tede/n55115/PD/UTILS/DirectoryComparison/ORIGINFOLDERa'
# dir2 = '/home/tede/n55115/PD/UTILS/DirectoryComparison/BATCHINFO'
# ##
# Compare Directories
in_dir1, in_dir2 = compare_directories(dir1, dir2)
# ##
# Output
print (' ')
print 'Comparing Files: '
print (dir1)
print (dir2)
print (' ')
print 'Comparing sizes: '
size1 = du(dir1)
size2 = du(dir2)
print ("Directory " + dir1 + " size: " + str(size1))
print ("Directory " + dir2 + " size: " + str(size2))
print '\nFiles only in {}:'.format(dir1)
for relative_path in in_dir1:
print '* {0}'.format(relative_path)
print '\nFiles only in {}:'.format(dir2)
for relative_path in in_dir2:
print '* {0}'.format(relative_path)
# Boolean
return are_eq(dir1, dir2)
main()
print main()
# Two primary data structures are created:
# (1) A list of tuples. Each tuple contains a pair of items: a file size and a
# file path. The file size is the size of the file pointed to by the file path.
# The list is sorted on the file sizes. The set of file paths consists of all
# the paths to the files in directory_l (recursively including subdirectories
# of directory_l and excluding hidden files and folders by default).
# For example:
# [(file_size_1, file_path_1), (file_size_2, file_path_2), ...,
# (file_size_n, file_path_n)]
# file_size_1 = size of the file pointed to by file_path_1,
# file_size_2 = size of the file pointed to by file_path_2, ...,
# file_size_n = size of the file pointed to by file_path_n
# file_size_1 <= file_size_2 <= ... <= file_size_n
# file_path_1, file_path_2, ..., file_path_n = all the paths to the files in
# directory_l (recursively including subdirectories of directory_l and
# excluding hidden files and folders by default)
# (2) A dictionary mapping each unique file size in directory_r to a list of
# all the paths to files of that size in directory_r (recursively including
# subdirectories of directory_r and excluding hidden files and folders by
# default).
# For each file pointed to in (1), its size is checked for existence in (2).
# If its size does not exist in (2), the file path to it is stored as
# unmatched. If its size does exist in (2), a byte by byte comparison is done
# between it and each file matching its size in (2) until a match is found, if
# any. If a match is not found, the file path to it is stored as unmatched. The
# stored list of unmatched file paths, if any, is then printed.
# Uses suggestions by msvalkon and Janne Karila in Stack Exchange Code Review:
# http://codereview.stackexchange.com/questions/41853/byte-by-byte-directory-comparison-ignoring-folder-structures-and-file-name-diffe
# Requires the progress bar library (2.2)
# https://pypi.python.org/pypi/progressbar/2.2
# http://code.google.com/p/python-progressbar/
import argparse
import collections
import filecmp
import os
import sys
from operator import itemgetter
#from progressbar import Bar, Percentage, ProgressBar
def main():
help_description = \
'Prints a list of the paths to the files that exist in the directory pointed \
to by directory_l, but that do not exist in the directory pointed to by \
directory_r. File name differences are ignored. Recursively scans \
subdirectories of directory_l and directory_r. Skips hidden files and folders \
by default. Files of the same size are compared byte by byte. Differences in \
directory structures are ignored. For example, if \
directory_l/subdirectory_1/file_name_1 and \
directory_r/subdirectory_2/subdirectory_3/file_name_2 match byte for byte, \
then directory_l/subdirectory_1/file_name_1 exists in directory_r.'
parser = argparse.ArgumentParser(description = help_description)
parser.add_argument('-a', '--all', action='store_true', help='include hidden \
files and folders')
parser.add_argument('directory_l', help='path to a directory of files to \
search for')
parser.add_argument('directory_r', help='path to a directory of files to \
search in')
args = vars(parser.parse_args())
include_hidden = args['all']
directory_l = args['directory_l']
directory_r = args['directory_r']
if not os.path.isdir(directory_l):
print "Invalid directory path: " + directory_l
sys.exit(2)
if not os.path.isdir(directory_r):
print "Invalid directory path: " + directory_r
sys.exit(2)
unmatched = find_unmatched(directory_l, directory_r, include_hidden)
# Prints the paths to any unmatched files.
if not unmatched:
print "No unmatched files."
else:
print "Unmatched files:"
for file_path in unmatched:
print file_path
def find_unmatched(directory_l, directory_r, include_hidden):
print "Preprocessing..."
# Creates (1)
size_file_path_tuple_list_l = sizes_paths(directory_l, include_hidden)
# Sorts the list by the first item in each tuple pair (size).
size_file_path_tuple_list_l_sorted = sorted(size_file_path_tuple_list_l, \
key=itemgetter(1)) # (1)
# Creates (2)
size_file_path_tuple_list_r = sizes_paths(directory_r, include_hidden)
size_to_file_path_list_dict_r = \
dict_of_lists(size_file_path_tuple_list_r) # (2)
# Compares the files
print "Comparing files..."
unmatched = []
# Creates a progress bar
#pbar = ProgressBar(widgets=[Percentage(), Bar()], \
#maxval=len(size_file_path_tuple_list_l_sorted))
#pbar.start()
for i, (size_l, file_path_l) in enumerate(size_file_path_tuple_list_l_sorted):
# size_to_file_path_list_dict_r[size_l] is a list of the paths to the files
# in directory_r (recursively including subdirectories of directory_r and
# excluding hidden files and folders by default) that are the same size as
# the file pointed to by file_path_1.
# Note that in the statement 'size_to_file_path_list_dict_r[size_l]', if
# size_l does not exist as a key in size_to_file_path_list_dict_r, then
# size_l is added as a key that maps to an empty list.
if not file_match(file_path_l, size_to_file_path_list_dict_r[size_l]):
# Either no files in directory_r (recursively including subdirectories of
# directory_r and excluding hidden files and folders by default) exist
# that are the same size as the file pointed to by file_path_l, or none
# of those that do are a byte by byte match.
unmatched.append(file_path_l)
#pbar.update(i)
#pbar.finish()
return unmatched
# Returns as tuple pairs the size of and path to each of the files in the
# directory pointed to by 'top', recursively including subdirectories of 'top'.
# Hidden files and folders are not returned unless 'include_hidden' is True.
def sizes_paths(top, include_hidden):
for file_path in get_directory_file_paths(top, include_hidden):
size = os.path.getsize(file_path)
yield size, file_path
# Returns each of the paths to the files in the directory pointed to by 'top',
# recursively including subdirectories of 'top'. Hidden files and folders are
# not returned unless 'include_hidden' is True.
def get_directory_file_paths(top, include_hidden):
for directory_path, folder_name_list, file_name_list in os.walk(top):
# directory_path is the path to the current directory
# folder_name_list is the list of all the folder names in the
# current directory
# file_name_list is the list of the file names in the current directory
if not include_hidden:
# Ignore hidden files and folders
# http://stackoverflow.com/questions/13454164/os-walk-without-hidden-folders
# Answer by Martijn Pieters
# Removes the file names that begin with '.' from the list of file names
# in the current directory.
file_name_list = [f for f in file_name_list if not f[0] == '.']
# Removes the folder names that begin with '.' from the list of folder
# names in the current directory.
folder_name_list[:] = [f for f in folder_name_list if not f[0] == '.']
for file_name in file_name_list:
yield os.path.join(directory_path, file_name)
# Creates and returns a dictionary of lists from a list of tuple pairs.
# The keys in the dictionary are the set of the unique first items from the
# tuple pairs. Each of these keys is mapped to a list of all the second items
# from the tuple pairs whose first item matches that key.
# Example:
# {'a': [1, 1], 'c': [1], 'b': [2, 3]} =
# dict_of_lists([('a', 1), ('a', 1), ('b', 2), ('b', 3), ('c', 1)])
def dict_of_lists(item_list):
# http://docs.python.org/2/library/collections.html#collections.defaultdict
d = collections.defaultdict(list)
for key, value in item_list:
# If d[key] does not exist, an empty list is created and value is attached
# to it. Otherwise, if d[key] does exist, value is appended to it.
d[key].append(value)
return d
# Returns True if and only if any of the files pointed to by the file paths in
# file_path_list_r are a byte by byte match for the file pointed to by
# file_path_l.
# Note that file_path_list_r may be an empty list.
def file_match(file_path_l, file_path_list_r):
return any(filecmp.cmp(file_path_l, file_path_r, False) \
for file_path_r in file_path_list_r)
main()