onlyforbopi
3/27/2017 - 9:58 AM

FILE_FORMATTING_READY_APPS

FILE_FORMATTING_READY_APPS

# -*- coding: utf-8 -*-
"""
Created on Wed Mar 22 08:48:37 2017

@author: P.Doulgeridis
"""


#####################################################################################
#
#   FILE QUERY V1
#
#   Function : Queries a file in a specific substring for a value
#
#   Usage : python FILEQUERY.PY <input> <output> <substart> <subend> 
#
#   Notes:
#
#   When we have to match a specific string, we should c/p it from the original
#   file into this script, on the give nline. Always check encoding of files to 
#   be processed.
#
#	python FILEQUERY.py ENDXMV_20170309.TXT TESTFORPYOUT2.TXT 445 2 > OEOEOEO2.TXT
#                                                                                   #
######################################################################################



#######################################
# LIBRARIES
import os
import sys
import time


#######################################
# FUNCTIONS


def script_path_param(string1):
    # PUT IT INTO MODULE
    #import sys
    #import os
    script_name = os.path.basename(string1)
    script_dir = os.path.dirname(os.path.realpath(string1))
    script_full = string1
    return ( script_name, script_dir, script_full )


def script_time_param():
    # Name: time parameter
    # Function: script_time_param
    # Input: None
    # Output: string with formatted time
    # Usage: print (script_time_param) or a = script_time_param
    return time.strftime("%c")

def usage(string1):
    print ("\n" + "Name: " + str(os.path.basename(string1)))
    print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
    return True
    

def file_abs_path(file):
    '''
    Function : file_abs_path
    Description : Returns absolute path to file
    
    '''
    import os
    return os.path.abspath(str(file))

def filetolist(file):
  '''
  Function: filetolist
  Description: Reads a file, stores in list
  Input: File
  Output: List
  Usage: print (filetolist("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
  Notes: Path needs double \\ or reverse /
  '''
  file_in = str(file)
  lines = list(open(file_in, 'r'))
  return lines



def file_size_mb(filePath): 
    '''
    Function:
    Description:
    Input:
    Output:
    
    
    '''
    return float(os.path.getsize(filePath)) / (1024 * 1024)


def count_file(file):
    chars = words = lines = 0
    with open(str(file), 'r') as in_file:
        for line in in_file:
            lines += 1
            words += len(line.split())
            chars += len(line)
    return (lines, words, chars)



#######################################
# CONSTANTS

# Expected n. of Arguments
cor_args_nbr = 3
#script_usage = usage()


#######################################
# SCRIPT PATHS

# Manual way to get paths - requires modules (sys, os)
script_name = os.path.basename(sys.argv[0])
script_dir  = os.path.dirname(sys.argv[0])
script_full = str(sys.argv[0])

#print (script_name)
#print (script_dir)
#print (script_full)


# With Function use 
a = (script_path_param(sys.argv[0])[1])
b = (script_time_param())
c = usage(sys.argv[0])

#print (a)
#print (b)
#print (usage(sys.argv[0]))


#######################################
# GLOBAL VARIABLES

log_file = r"C:\Users\p.doulgeridis\Desktop\CODES\CENTRAL_LOG.txt"
file_in = sys.argv[1]
file_ot = sys.argv[2]
sub_start = sys.argv[3]
sub_lend = int(sys.argv[3]) + int(sys.argv[4])


#print(file_in)
#print(file_abs_path(file_in))
#print(file_ot)

#print(sub_start)
#print(sub_lend)


#######################################
# SCRIPT PARAMETERS
print ("Input File: " + file_in )
print ("Input File - Absolute path " + file_abs_path(file_in))
print ("Output File: " + file_ot )
print ("Start Digit: " + str(sub_start)  )
print ("Length: " + str(sys.argv[4]) )
print ("Logfile: " + log_file + "\n" )



#######################################
# SCRIPT START
print ("Script started at " + script_time_param() + "\n" )

# DO WORK
print("Reading input file .....\n")
b = filetolist(file_in)
out = open(file_ot, 'w')
count = 0

for j in b:
    count += 1
    check = str(j[int(sub_start):int(sub_lend)])
    
    # IMPORTANT - N01 : HERE WE ENTER THE STRING MATCH
    # TAKE THE STRING WITH C/P FROM THE ORIGINAL FILE AND COPY PASTE IT HERE
    # IF MORE STRING MATCHES ARE REQUIRED, CHANGE THE IF STATEMENT
    if check == "Ι7":
        out.write(j)
        #print(check)

out.close

# DO WORK 2
count_in = count_file(file_in)
count_ot = count_file(file_ot)

# REPORTING
print ("@@@@")
print ("File input : " + file_in )
print ("Size: " + str(file_size_mb(file_in)) + " MB")
print ("Linecount: " + str(count) )
print ("Word count: " + str(count_in[1]))
print ("Character count: " + str(count_in[2]))
print ("@")
print ("File output: " + file_ot )
print ("Size: " + str(file_size_mb(file_ot)) + " MB")
print ("Line count: " + str(count_ot[0]))
print ("Word count: " + str(count_ot[1]))
print ("Character count: " + str(count_ot[2]))

# LOGGING
log = open(log_file, 'a')
log.write("Script: " + script_name + " run at: " + script_time_param() + "\n" )
log.close



##########################################
# SCRIPT END
print ("\n\nScript ended at " + script_time_param() + "\n" )
#!/bin/bash



#
#
#	to do :
#
#		1. constants variables functions script start script end
#		2. add knowledge to gistbox
#		3. gistbox / files and gistbox / bash arrays
#		4. folder ready apps
#
#

################################################################
# Constants
readonly       THIS_JOB=${0##*/}




# THIS IS AN EXAMPLE OF HOW WE CAN HANDLE MULTIPLE INPUT FILES
# THIS WAY THE INPUTS ARE STORED IN AN ARRAY
# TWO FLAGS ARE USED TO SIGNIFY INPUT FROM OUTPUT -I / -O
declare inputFiles
declare outputFile

#[[ $_ == $0 ]] && getFriendlyFileSize $1 $2

# NEEDS BETTER DOCUMENTATION
# ITERATE OVER ARGUMENT LIST - $#
while  (($#)); do
	# IF THE FIRST ELEMENT YOU FIND IS -O
	if [[ $1 = -o ]]; then
		# STORE THE NEXT IN THE OUTPUT FILE AND SHIFT THEM BY ONE
		outputFile=$2; shift
	# IF ITS -I
	elif [[ $1 = -i ]]; then
		# APPEND THE SECOND VALUE TO THE ARRAY AND SHIFT THEM BY ONE
		inputFiles+=( "$2" ); shift
	else
		# ALWAYS APPEND THE VALUE TO ARRAY
		inputFiles+=( "$1" )
	fi
  shift
done

function usage(){

	echo " Script : $THIS_JOB called as :"
	echo " $THIS_JOB <DATE>"
	echo " Date must be in the format YYMMDD "
	echo " Script will search for the lines of FILTER (2 fields) and "
	echo " find matches in FILE_INPUT, then email."
	echo

}


function chk_abnd_silent()
{
  local     abnd
  
  abnd=$1
  # date
  if [ $abnd -ne 0 ]; then
    echo "Failed with status " $abnd; echo
    #rm +++++++++++++
    exit $abnd
    #return $abnd
  # else
    # echo "Done"; echo
  fi
}
#



# Main Function that produces the results
function process(){

	FILE_IN=$1
	linecount=`wc -l $FILE_IN | awk -F " " '{print $1}'`
	wordcount=`wc -w $FILE_IN | awk -F " " '{print $1}'`
    charcount=`wc -m $FILE_IN | awk -F " " '{print $1}'`
	sizecount=`ls -lah $FILE_IN | awk -F " " '{print $5}'`   
	sizecount2=`wc -c $FILE_IN | awk -F " " '{print $1}'`
	md5sumrep=`md5sum $FILE_IN | awk -F " " '{print $1}'`
	abspathfi=`echo $(cd $(dirname "$1") && pwd -P)/$(basename "$1")`
	typeoffil=`file $FILE_IN`
	fiencodin=`file -bi $FILE_IN`
	fdiskusag=`du -hca $FILE_IN | awk -F " " ' NR==2 {print $1}'`
	flineleng=`wc -L $FILE_IN | awk -F " " '{print $1}'`
	
	echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
	echo "@ Blueprint of $FILE_IN"
	
	stat $FILE_IN
	#
	# ADD THEM TO GISTBOX - FILE DIR
	#
	
	echo "Linecount      = $linecount"
	echo "Wordcount      = $wordcount"
	echo "Charcount      = $charcount"
	echo "Sizecount      = $sizecount2"
	echo "Disk Usage     = $fdiskusag"
	echo "MD5sum         = $md5sumrep"
	echo "Absolute Path  = $abspathfi"
	echo "Encoding       = $fiencodin"
	echo "File Type      = $typeoffil"
	echo "Longest Line   = $flineleng"
	echo "$fzerosize"
	echo ""
	
	# userinput
	# echo "#######################################################"
	# echo " Displaying linecount statistics of file "
	# echo
	# wc -l $FILE_IN
	# echo "#######################################################"
	# contprompt
	

}


#################################################################
# Script start.
echo $THIS_JOB " start time at `date` "; echo 
echo "-"

echo "Processing ...."
echo "The following files will be processed: "

# ADD TO GISTBOX - HOW TO PRINT ENTIRE ARRAY
echo "${inputFiles[@]}"; echo

# ADD TO GISTBOX - HOW TO ITERATE OVER ARRAY
for inputFile in "${inputFiles[@]}"; do
	echo "Blueprinting File : $inputFile"
	
	if [[ -e $inputFile ]]; then
		echo "Input File : $inputFile exists"
	else
		echo "Input File : $inputFile does not exist"
	fi
	
	if [[ -d $inputFile ]]; then
		echo "Input File : $inputFile is a directory"
		continue
	else
		echo "Input File : $inputFile is regular, proceeding.."
	fi
	
	echo ""
	#stat "$inputFile" >> $outputFile
	#getFriendlyFileSize 100
	process $inputFile >> $outputFile
done

#stat "$FILE_IN"



#################################################################
# Script end.
echo $THIS_JOB " end time at `date` "; echo
echo "-"
1. blueprint.bsh           : Gives all relevant info for amount of files
2. filequery.py            : Query on specific substring of textfile