Parse csv files for KnowBe4 email upload
#!/usr/bin/env python
"""Program to parse and update email upload list"""
import sys
import os
import re
import csv
import datetime
__author__ = "James R. Aylesworth"
__contributor__ = "Tim Wesline"
__copyright__ = "Copyright 2017"
__license__ = "GPL"
__version__ = "1.3.0"
__maintainer__ = "James R. Aylesworth"
__email__ = "jamesaylesworth@monroecounty.gov"
__status__ = "PRODUCTION"
'''
NOTES:
1.3.0 - Email validation added
Removal of emails in provided exemptions list working
Cleaned up output
1.2.3 - Tim Wesline found correct RegEx to split out OU's
1.2.2 - Added section for parsing CSV files, completing import and indexing of headers
1.2.1 - Added beginning break in print out as well as script run date
Logically re-ordered functions
1.2.0 - Added method to remove known bad emails from provided list
All output is now in complete lowercase
Provided option to order list
1.1.3 - General cleanup, additional commenting, and better worded output
1.1.2 - Broke out file open, read and close into its own function
1.1.1 - Updates to validateFile function to utilize passed variable instead of array referance
'''
def validateFile(fileLocation):
print('Validating ' + fileLocation)
# verify the path and file names before setting any variables
if os.path.exists(os.path.dirname(fileLocation)) & os.path.isdir(os.path.dirname(fileLocation)):
if not os.path.isfile(fileLocation):
print('*** File does not appear at given path. ***')
return False
else:
print('--> VALIDATED\n')
return True
else:
print('Given path does is incorrect, or is not a directory.')
return False
def csvParse(csvFile):
# read in csv
csvRows = []
csvFileObject = open(csvFile)
csvReaderObject = csv.reader(csvFileObject)
# required headers for upload file
csvRequiredHeaders = ['Email', 'First Name', 'Last Name', 'Phone Number', 'Extension', 'Group', 'Location',
'Division', 'Manager Name', 'Manager Email', 'Employee Number', 'Job Title', 'Password', 'Mobile', 'AD Managed']
# required headers as called by lotus
lotusHeaders = ['Internet Address', 'First Name', 'Last Name', 'Business Phone', 'Extension', 'Full Name', 'Company',
'Department', 'Manager', 'Manager Email', 'Employee Number', 'Job Title', 'Password', 'Mobile Phone', 'AD Managed']
# actual listing of lotus export headers
lotusHeadersFull = ['Title', 'First Name', 'Middle Name', 'Last Name', 'Full Name', 'Short Name', 'Phonetic Name', 'Suffix', 'Company', 'Department',
'Messaging ID', 'Job Title', 'Business Street', 'Business City', 'Business State', 'Business Postal Code',
'Business Country/Region', 'Home Street', 'Home City', 'Home State', 'Home Postal Code', 'Home Country/Region',
'Other Street', 'Other City', 'Other State', 'Other Postal Code', 'Other Country/Region', 'Assistant\'s Phone', 'Fax',
'Business Phone', 'Business Phone 2', 'Fax 2', 'Home Phone', 'Home Phone 2', 'Mobile Phone', 'Mobile Phone 2', 'Pager',
'Anniversary', 'Manager', 'Assistant\'s Name', 'Birthday', 'Category', 'Children', 'Directory Server', 'Business Mail',
'Internet Address', 'Personal Mail', 'Assistant\'s Mail', 'Business Mail 2', 'Personal Mail 2', 'Location', 'Comments',
'Spouse', 'Web Site', 'Blog Site']
headerIndices = []
for headerItem in lotusHeaders:
try:
headerIndices.append(lotusHeadersFull.index(headerItem))
except Exception as e:
print('Error trying to add ' + headerItem)
# add a blank so we don't mess up header order
headerIndices.append('')
continue
else:
continue
print('\nHeader Indices are: ' + str(headerIndices) + '\n')
# Create storage, set headers, and build rows for new output
dataToOutput = []
dataToOutput.append(csvRequiredHeaders)
for csvItem in csvReaderObject:
rowToAdd = []
if csvReaderObject.line_num == 1: # skip first row; it's an incorrect header
continue
for rowLoc in headerIndices:
skipRow = False
if(rowLoc == ''):
rowToAdd.append('')
else:
# check if this is OU listing
reOUMatch = re.findall(r'OU([+=]\w+)', csvItem[rowLoc])
if(reOUMatch):
# pull out and append only the *actual* OU name
rowToAdd.append(reOUMatch[0].split("=")[1])
continue
else:
if(isEmailAddress(str(csvItem[rowLoc]))):
if(isGoodEmail(csvItem[rowLoc].lower(), readInFile(sys.argv[2]))):
rowToAdd.append(csvItem[rowLoc].lower())
else:
skipRow = True # email is invalid; skip it
break
elif(rowLoc > 0):
rowToAdd.append(csvItem[rowLoc])
else:
continue
if(not skipRow):
dataToOutput.append(rowToAdd)
csvFileObject.close()
return dataToOutput
def writeCSV(fileToWriteOut, locationToWriteTo, fileNameToUse):
filenameAndPathToUse = (os.path.join(locationToWriteTo, fileNameToUse))
with open(filenameAndPathToUse, 'w', newline='') as csvfile:
csvWriter = csv.writer(csvfile, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in fileToWriteOut:
csvWriter.writerow(row)
print("\nUpload file written to: " + filenameAndPathToUse + '\n\n')
def readInFile(strFileAndLocation):
# open, read, store data, and close
fileObject = open(strFileAndLocation)
strFileData = fileObject.readlines()
fileObject.close()
return strFileData
def isEmailAddress(stringToTest):
reEmailMatches = re.findall(r'[\w\.-]+@[\w\.-]+', str(stringToTest))
if (reEmailMatches):
return True
else:
return False
def isGoodEmail(email, badEmailList):
result = ''
for x in badEmailList:
if(email.lower().rstrip() == x.lower().rstrip()):
result = False
print("--> removing " + email.lower() + " ... ")
break
else:
result = True
return result
def getEmails(searchString, sorted):
print('\nSearching for emails...')
reEmailMatches = re.findall(r'[\w\.-]+@[\w\.-]+', str(searchString))
# put unique emails into list, sort and then output to file
outputEmailLIST = []
for x in range(len(reEmailMatches)):
if outputEmailLIST.count(reEmailMatches[x]) == 0:
outputEmailLIST.append((reEmailMatches[x]))
if sorted:
outputEmailLIST.sort()
print('Emails found: ' + str(len(outputEmailLIST)))
return outputEmailLIST
def removeEmails(goodEmailList, badEmailList):
outputToSendBack = []
print('\nRemoving bad emails...')
for x in badEmailList:
x = x.lower()
print('Removing ' + x)
for y in outputToSendBack:
y = y.lower()
if(y.find(x)):
continue
else:
outputToSendBack.append(y)
return outputToSendBack
def printToFile(itemList, pathToWriteTo, filenameToUse):
strOutputPath = os.path.join(pathToWriteTo, filenameToUse)
outputFile = open(strOutputPath, 'w', newline='')
for item in itemList:
if item != '':
outputFile.write(item)
outputFile.close()
print('\nData saved to: ' + os.path.join(pathToWriteTo, filenameToUse))
def main():
print('------------------------------------------------------------')
print('Script run ' + str(datetime.datetime.now()) + '\n')
# validate system args using above function
if len(sys.argv) == 3:
if validateFile(sys.argv[1]):
strFileDirectory = os.path.dirname(sys.argv[1])
strFileBaseName = os.path.basename(sys.argv[1])
strFileAndLocation = os.path.join(
strFileDirectory, strFileBaseName)
if validateFile(sys.argv[2]):
strFileDirectory2 = os.path.dirname(sys.argv[2])
strFileBaseName2 = os.path.basename(sys.argv[2])
strFileAndLocation2 = os.path.join(
strFileDirectory2, strFileBaseName2)
else:
exit() # args were invalid; exit program!
else:
print(
'Usage: python.exe email_upload_cleaner.py [FILE_LOCATION\EMAIL_LIST] [FILE_LOCATION\REMOVE_LIST]')
exit()
# clean up the input file (match formatting and seperate out OU)
cleanedData = csvParse(sys.argv[1])
tempFileAndLocation = (os.path.join(strFileDirectory, '_temp_output.csv'))
writeCSV(cleanedData, strFileDirectory, 'KnowBe4_Email_Upload.csv')
print('------------------------------------------------------------')
main()