This scritp analyzes and measures the disk space used by digits jobs.
from os import listdir
from os.path import isfile, join
import math
import matplotlib.pyplot as plt
def getAllFileInFolder(folderPath, fileExtension):
totalExtension = ''
if fileExtension.startswith('.'):
totalExtension = fileExtension
else:
totalExtension = '.' + fileExtension
return [f for f in listdir(folderPath) if isfile(join(folderPath, f)) and f.endswith(totalExtension)]
def getAllFoldersInFolder(folderPath):
return [f for f in listdir(folderPath) if not isfile(join(folderPath, f))]
def generatePieChart(data, labels):
# Pie chart, where the slices will be ordered and plotted counter-clockwise:
# labels = 'Frogs', 'Hogs', 'Dogs', 'Logs'
# sizes = [15, 30, 45, 10]
# explode = (0, 0.1, 0, 0) # only "explode" the 2nd slice (i.e. 'Hogs')
fig1, ax1 = plt.subplots()
ax1.pie(data,
# explode=explode,
labels=labels, autopct='%1.1f%%',
shadow=True, startangle=90)
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()
def getFolderSize(folderPath):
size = 0
for f in os.listdir(folderPath):
path = os.path.join(folderPath, f)
if os.path.isfile(path):
size += os.path.getsize(path)
return size
def convert_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return "%s %s" % (s, size_name[i])
# Given a jobs folder the function returns the username of the owner
def getUserFromJob(job_folder):
statusFile = job_folder + "/" + "status.pickle"
if os.path.isfile(statusFile):
f = open(statusFile, "r")
lines = f.readlines()
f.close()
for i, line in enumerate(lines):
if "'username'" in line:
rawUserName = lines[i + 2]
leanUserName = rawUserName[1:-1]
return leanUserName
else:
print("File '" + statusFile + "' does not exist!")
return "Unknown"
import sys
import os.path
if __name__ == '__main__':
jobs_folder = "./"
# get the jobs folder from arguments
nArgs = len(sys.argv)
if nArgs > 1:
jobs_folder = sys.argv[1]
if not os.path.isdir(jobs_folder):
print("Folder '" + jobs_folder + "' does not exist!\n")
exit(-1)
# get the list of jobs and the user it belongs to
jobs = getAllFoldersInFolder(jobs_folder)
# get the user of each job
job_belongs_to = []
for job in jobs:
usr = getUserFromJob(os.path.join(jobs_folder, job))
job_belongs_to.append(usr)
# get the folder sizes
job_sizes = []
for job in jobs:
size = getFolderSize(os.path.join(jobs_folder, job))
job_sizes.append(size)
# write the log file
outFile = open("job_list.log", 'w')
for job, usr, size in zip(jobs, job_belongs_to, job_sizes):
outFile.write(job)
outFile.write(":")
outFile.write(usr)
outFile.write(":")
outFile.write(str(size))
outFile.write("\n")
outFile.close()
# get the user list
user_set = set(job_belongs_to)
user_list = list(user_set)
# generate the size lists
size_dict = {}
for usr in user_list:
size_dict[usr] = 0
for usr, size in zip(job_belongs_to, job_sizes):
size_dict[usr] += size
usr_ref_file = open("size_by_user.log", "w")
for usr in user_list:
usr_ref_file.write(usr)
usr_ref_file.write(":")
usr_ref_file.write(str(size_dict[usr]))
usr_ref_file.write(":")
usr_ref_file.write(str(convert_size(size_dict[usr])))
usr_ref_file.write("\n")
usr_ref_file.close()
# generate the chart
size_list = []
for usr in user_list:
size_list.append(size_dict[usr])
generatePieChart(size_list, user_list)