04pallav
9/11/2017 - 10:29 PM

groupby aggregation

groupby aggregation

dframe = DataFrame({'k1':['X','X','Y','Y','Z'],'k2':['alpha','beta','alpha','beta',
'alpha'],'dataset1':np.random.randn(5),'dataset2':np.random.randn(5)})

group1 = dframe['dataset1'].groupby(dframe['k1'])
gb = df.groupby(['Customer_id'], as_index=False) #see use of as_index here
df39485_300.groupby([df39485_300.Created_on_date.dt.year, df39485_300.Created_on_date.dt.month])
#grouping by month and year 

#creating the "groupby object"  df1['col1'].groupby(df1['col2'])
#notice col2 is categorical
group1.describe()

group1.agg('mean')
#after groupby object is created any function can be passed for aggregation
gb.agg({'dataset1': 'sum','dataset2':'mean'})
#Different functions can be used for different columns like this

group1.apply(np.mean)
#function has to be given inside apply

group1.mean() #after grouping it will calculate means for col1

dframe.groupby('k1').mean()
#groups all other columns by k1 and not a specific column and then mean

#we can also groupby multiple columns at once
dframe.groupby(['k1','k2']).mean()

# Another useful groupby method is getting the group sizes
dframe.groupby(['k1']).size()

# A possibly useful tactic is creating a dictionary of the data pieces
group_dict = dict(list(dframe.groupby('k1')))
group_dict['X']

df=df.reset_index() ## getting index as column