jgoenetxea
5/2/2018 - 9:56 AM

How to list jobs from digits

This scritp analyzes and measures the disk space used by digits jobs.

from os import listdir
from os.path import isfile, join
import math
import matplotlib.pyplot as plt

def getAllFileInFolder(folderPath, fileExtension):
    totalExtension = ''
    if fileExtension.startswith('.'):
        totalExtension = fileExtension
    else:
        totalExtension = '.' + fileExtension

    return [f for f in listdir(folderPath) if isfile(join(folderPath, f)) and f.endswith(totalExtension)]


def getAllFoldersInFolder(folderPath):
    return [f for f in listdir(folderPath) if not isfile(join(folderPath, f))]


def generatePieChart(data, labels):
    # Pie chart, where the slices will be ordered and plotted counter-clockwise:
    # labels = 'Frogs', 'Hogs', 'Dogs', 'Logs'
    # sizes = [15, 30, 45, 10]
    # explode = (0, 0.1, 0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')

    fig1, ax1 = plt.subplots()
    ax1.pie(data, 
            # explode=explode, 
            labels=labels, autopct='%1.1f%%',
            shadow=True, startangle=90)
    ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    plt.show()


def getFolderSize(folderPath):
    size = 0
    for f in os.listdir(folderPath):
        path = os.path.join(folderPath, f)
        if os.path.isfile(path):
            size += os.path.getsize(path)
    return size


def convert_size(size_bytes):
    if size_bytes == 0:
        return "0B"
    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes / p, 2)
    return "%s %s" % (s, size_name[i])


# Given a jobs folder the function returns the username of the owner
def getUserFromJob(job_folder):
    statusFile = job_folder + "/" + "status.pickle"
    if os.path.isfile(statusFile):
        f = open(statusFile, "r")
        lines = f.readlines()
        f.close()
    
        for i, line in enumerate(lines):
            if "'username'" in line:
                rawUserName = lines[i + 2]
                leanUserName = rawUserName[1:-1]
                return leanUserName
    else:
        print("File '" + statusFile + "' does not exist!")
    return "Unknown"



import sys
import os.path

if __name__ == '__main__':
    jobs_folder = "./"
    # get the jobs folder from arguments
    nArgs = len(sys.argv) 
    if nArgs > 1:
        jobs_folder = sys.argv[1]
        if not os.path.isdir(jobs_folder):
            print("Folder '" + jobs_folder + "' does not exist!\n")
            exit(-1)

    # get the list of jobs and the user it belongs to
    jobs = getAllFoldersInFolder(jobs_folder)

    # get the user of each job
    job_belongs_to = []
    for job in jobs:
        usr = getUserFromJob(os.path.join(jobs_folder, job))
        job_belongs_to.append(usr)

    # get the folder sizes
    job_sizes = []
    for job in jobs:
        size = getFolderSize(os.path.join(jobs_folder, job))
        job_sizes.append(size)

    # write the log file
    outFile = open("job_list.log", 'w')
    for job, usr, size in zip(jobs, job_belongs_to, job_sizes):
        outFile.write(job)
        outFile.write(":")
        outFile.write(usr)
        outFile.write(":")
        outFile.write(str(size))
        outFile.write("\n")
    outFile.close()
    
    # get the user list
    user_set = set(job_belongs_to)
    user_list = list(user_set)

    # generate the size lists
    size_dict = {}
    for usr in user_list:
        size_dict[usr] = 0
    for usr, size in zip(job_belongs_to, job_sizes):
        size_dict[usr] += size

    usr_ref_file = open("size_by_user.log", "w")
    for usr in user_list:
        usr_ref_file.write(usr)
        usr_ref_file.write(":")
        usr_ref_file.write(str(size_dict[usr]))
        usr_ref_file.write(":")
        usr_ref_file.write(str(convert_size(size_dict[usr])))
        usr_ref_file.write("\n")
    usr_ref_file.close()

    # generate the chart
    size_list = []
    for usr in user_list:
        size_list.append(size_dict[usr])
    
    generatePieChart(size_list, user_list)