jmquintana79
8/18/2016 - 7:14 AM

Look for temporal holes into pandas dataframe with datetime column

Look for temporal holes into pandas dataframe with datetime column

"""
DF:               pandas df with datetime column to be checked
name_datetime:    name of datetime column in DF
step_datetime:    temporal resolution of datetime trend (minutes)

"""

def dfholes(DF,name_datetime,step_datetime):
    import pandas as pd
    import numpy as np

    # avoid warning
    pd.options.mode.chained_assignment = None  # default='warn'
    
    # sort df by datetime
    DF.sort([name_datetime], ascending=[1], inplace=True)

    # get list of dates
    ldates = DF[name_datetime].tolist()

    # calculate the real number of steps to check
    nsteps = int( (ldates[-1] - ldates[0]).total_seconds()//60. ) + 1

    # list of dates to check + index
    ldt = [ ldates[0] + timedelta(minutes=int(step_datetime)*i) for i in range(nsteps)]
    lindex = list(range(len(ldt)))

    # store in dataframe
    DFDT = pd.DataFrame(np.array(list(zip(ldt,lindex))),columns=['datetime','index'])
    DFDT = DFDT.set_index(['datetime'])

    # set index of df to be checked
    try:
        DF = DF.set_index([name_datetime])
    except:
        pass

    # check 
    DFCHECK = pd.isnull(pd.concat([DFDT, DF], axis=1))
    # filter by availability
    DFCHECK = DFCHECK[DFCHECK[DFCHECK.columns[-1]]==True]
    
    # return of np array with dates of temporal holes
    return np.array(DFCHECK.index.tolist())