FILE_FORMATTING_READY_APPS
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 22 08:48:37 2017
@author: P.Doulgeridis
"""
#####################################################################################
#
# FILE QUERY V1
#
# Function : Queries a file in a specific substring for a value
#
# Usage : python FILEQUERY.PY <input> <output> <substart> <subend>
#
# Notes:
#
# When we have to match a specific string, we should c/p it from the original
# file into this script, on the give nline. Always check encoding of files to
# be processed.
#
# python FILEQUERY.py ENDXMV_20170309.TXT TESTFORPYOUT2.TXT 445 2 > OEOEOEO2.TXT
# #
######################################################################################
#######################################
# LIBRARIES
import os
import sys
import time
#######################################
# FUNCTIONS
def script_path_param(string1):
# PUT IT INTO MODULE
#import sys
#import os
script_name = os.path.basename(string1)
script_dir = os.path.dirname(os.path.realpath(string1))
script_full = string1
return ( script_name, script_dir, script_full )
def script_time_param():
# Name: time parameter
# Function: script_time_param
# Input: None
# Output: string with formatted time
# Usage: print (script_time_param) or a = script_time_param
return time.strftime("%c")
def usage(string1):
print ("\n" + "Name: " + str(os.path.basename(string1)))
print ("Call as: " + str(os.path.basename(string1)) + " <FILE_IN> <FILE_OT> <SUB_START> <SUB_LENGTH> \n")
return True
def file_abs_path(file):
'''
Function : file_abs_path
Description : Returns absolute path to file
'''
import os
return os.path.abspath(str(file))
def filetolist(file):
'''
Function: filetolist
Description: Reads a file, stores in list
Input: File
Output: List
Usage: print (filetolist("C:\\Users\\p.doulgeridis\\Desktop\\testpy.txt"))
Notes: Path needs double \\ or reverse /
'''
file_in = str(file)
lines = list(open(file_in, 'r'))
return lines
def file_size_mb(filePath):
'''
Function:
Description:
Input:
Output:
'''
return float(os.path.getsize(filePath)) / (1024 * 1024)
def count_file(file):
chars = words = lines = 0
with open(str(file), 'r') as in_file:
for line in in_file:
lines += 1
words += len(line.split())
chars += len(line)
return (lines, words, chars)
#######################################
# CONSTANTS
# Expected n. of Arguments
cor_args_nbr = 3
#script_usage = usage()
#######################################
# SCRIPT PATHS
# Manual way to get paths - requires modules (sys, os)
script_name = os.path.basename(sys.argv[0])
script_dir = os.path.dirname(sys.argv[0])
script_full = str(sys.argv[0])
#print (script_name)
#print (script_dir)
#print (script_full)
# With Function use
a = (script_path_param(sys.argv[0])[1])
b = (script_time_param())
c = usage(sys.argv[0])
#print (a)
#print (b)
#print (usage(sys.argv[0]))
#######################################
# GLOBAL VARIABLES
log_file = r"C:\Users\p.doulgeridis\Desktop\CODES\CENTRAL_LOG.txt"
file_in = sys.argv[1]
file_ot = sys.argv[2]
sub_start = sys.argv[3]
sub_lend = int(sys.argv[3]) + int(sys.argv[4])
#print(file_in)
#print(file_abs_path(file_in))
#print(file_ot)
#print(sub_start)
#print(sub_lend)
#######################################
# SCRIPT PARAMETERS
print ("Input File: " + file_in )
print ("Input File - Absolute path " + file_abs_path(file_in))
print ("Output File: " + file_ot )
print ("Start Digit: " + str(sub_start) )
print ("Length: " + str(sys.argv[4]) )
print ("Logfile: " + log_file + "\n" )
#######################################
# SCRIPT START
print ("Script started at " + script_time_param() + "\n" )
# DO WORK
print("Reading input file .....\n")
b = filetolist(file_in)
out = open(file_ot, 'w')
count = 0
for j in b:
count += 1
check = str(j[int(sub_start):int(sub_lend)])
# IMPORTANT - N01 : HERE WE ENTER THE STRING MATCH
# TAKE THE STRING WITH C/P FROM THE ORIGINAL FILE AND COPY PASTE IT HERE
# IF MORE STRING MATCHES ARE REQUIRED, CHANGE THE IF STATEMENT
if check == "Ι7":
out.write(j)
#print(check)
out.close
# DO WORK 2
count_in = count_file(file_in)
count_ot = count_file(file_ot)
# REPORTING
print ("@@@@")
print ("File input : " + file_in )
print ("Size: " + str(file_size_mb(file_in)) + " MB")
print ("Linecount: " + str(count) )
print ("Word count: " + str(count_in[1]))
print ("Character count: " + str(count_in[2]))
print ("@")
print ("File output: " + file_ot )
print ("Size: " + str(file_size_mb(file_ot)) + " MB")
print ("Line count: " + str(count_ot[0]))
print ("Word count: " + str(count_ot[1]))
print ("Character count: " + str(count_ot[2]))
# LOGGING
log = open(log_file, 'a')
log.write("Script: " + script_name + " run at: " + script_time_param() + "\n" )
log.close
##########################################
# SCRIPT END
print ("\n\nScript ended at " + script_time_param() + "\n" )
#!/bin/bash
#
#
# to do :
#
# 1. constants variables functions script start script end
# 2. add knowledge to gistbox
# 3. gistbox / files and gistbox / bash arrays
# 4. folder ready apps
#
#
################################################################
# Constants
readonly THIS_JOB=${0##*/}
# THIS IS AN EXAMPLE OF HOW WE CAN HANDLE MULTIPLE INPUT FILES
# THIS WAY THE INPUTS ARE STORED IN AN ARRAY
# TWO FLAGS ARE USED TO SIGNIFY INPUT FROM OUTPUT -I / -O
declare inputFiles
declare outputFile
#[[ $_ == $0 ]] && getFriendlyFileSize $1 $2
# NEEDS BETTER DOCUMENTATION
# ITERATE OVER ARGUMENT LIST - $#
while (($#)); do
# IF THE FIRST ELEMENT YOU FIND IS -O
if [[ $1 = -o ]]; then
# STORE THE NEXT IN THE OUTPUT FILE AND SHIFT THEM BY ONE
outputFile=$2; shift
# IF ITS -I
elif [[ $1 = -i ]]; then
# APPEND THE SECOND VALUE TO THE ARRAY AND SHIFT THEM BY ONE
inputFiles+=( "$2" ); shift
else
# ALWAYS APPEND THE VALUE TO ARRAY
inputFiles+=( "$1" )
fi
shift
done
function usage(){
echo " Script : $THIS_JOB called as :"
echo " $THIS_JOB <DATE>"
echo " Date must be in the format YYMMDD "
echo " Script will search for the lines of FILTER (2 fields) and "
echo " find matches in FILE_INPUT, then email."
echo
}
function chk_abnd_silent()
{
local abnd
abnd=$1
# date
if [ $abnd -ne 0 ]; then
echo "Failed with status " $abnd; echo
#rm +++++++++++++
exit $abnd
#return $abnd
# else
# echo "Done"; echo
fi
}
#
# Main Function that produces the results
function process(){
FILE_IN=$1
linecount=`wc -l $FILE_IN | awk -F " " '{print $1}'`
wordcount=`wc -w $FILE_IN | awk -F " " '{print $1}'`
charcount=`wc -m $FILE_IN | awk -F " " '{print $1}'`
sizecount=`ls -lah $FILE_IN | awk -F " " '{print $5}'`
sizecount2=`wc -c $FILE_IN | awk -F " " '{print $1}'`
md5sumrep=`md5sum $FILE_IN | awk -F " " '{print $1}'`
abspathfi=`echo $(cd $(dirname "$1") && pwd -P)/$(basename "$1")`
typeoffil=`file $FILE_IN`
fiencodin=`file -bi $FILE_IN`
fdiskusag=`du -hca $FILE_IN | awk -F " " ' NR==2 {print $1}'`
flineleng=`wc -L $FILE_IN | awk -F " " '{print $1}'`
echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
echo "@ Blueprint of $FILE_IN"
stat $FILE_IN
#
# ADD THEM TO GISTBOX - FILE DIR
#
echo "Linecount = $linecount"
echo "Wordcount = $wordcount"
echo "Charcount = $charcount"
echo "Sizecount = $sizecount2"
echo "Disk Usage = $fdiskusag"
echo "MD5sum = $md5sumrep"
echo "Absolute Path = $abspathfi"
echo "Encoding = $fiencodin"
echo "File Type = $typeoffil"
echo "Longest Line = $flineleng"
echo "$fzerosize"
echo ""
# userinput
# echo "#######################################################"
# echo " Displaying linecount statistics of file "
# echo
# wc -l $FILE_IN
# echo "#######################################################"
# contprompt
}
#################################################################
# Script start.
echo $THIS_JOB " start time at `date` "; echo
echo "-"
echo "Processing ...."
echo "The following files will be processed: "
# ADD TO GISTBOX - HOW TO PRINT ENTIRE ARRAY
echo "${inputFiles[@]}"; echo
# ADD TO GISTBOX - HOW TO ITERATE OVER ARRAY
for inputFile in "${inputFiles[@]}"; do
echo "Blueprinting File : $inputFile"
if [[ -e $inputFile ]]; then
echo "Input File : $inputFile exists"
else
echo "Input File : $inputFile does not exist"
fi
if [[ -d $inputFile ]]; then
echo "Input File : $inputFile is a directory"
continue
else
echo "Input File : $inputFile is regular, proceeding.."
fi
echo ""
#stat "$inputFile" >> $outputFile
#getFriendlyFileSize 100
process $inputFile >> $outputFile
done
#stat "$FILE_IN"
#################################################################
# Script end.
echo $THIS_JOB " end time at `date` "; echo
echo "-"
1. blueprint.bsh : Gives all relevant info for amount of files
2. filequery.py : Query on specific substring of textfile