slzdevsnp
8/14/2019 - 3:19 PM

[panda_basic] #loc #iloc #drop #filter #sort_values #csv

[panda_basic] #loc #iloc #drop #filter #sort_values #csv

panda_basic.py

import pandas as pd
import numpy as np
from pprint import pprint as pp
# Create an example dataframe
data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy', 'Jason'], 
        'year': [2012, 2012, 2013, 2014, 2014, 2015], 
        'reports': [4, 24, 31, 2, 3, 0]}
df = pd.DataFrame(data, index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma', 'Moscow'])

##############################
##   droprows, columns  #
##############################
df
#drop a row based on index value
df.drop('Cochice', axis=0, inplace=True)

#drop a column 'reports'
df.drop('reports',axis=1,inplace=True)

#drop first two rows basedon num index 0:2
df.drop(df.index[0:2],axis=0,inplace=True)

#drop last row of the df
df.drop(df.index[-1],axis=0,inplace=True)

#drop last two rows 
df.drop(df.index[-2:],axis=0,inplace=True)

#-------------------
# drop rows based on  conditifon(s)
#--------------------
## !!NB this expects a continious monotonic index !!
df.drop(df.loc[df['year']==2012].index, axis=0,inplace=True)

#######################
##   sort values values #
#######################

df.sort_values(by='name',ascending=False)
#by multiple criterias
df.sort_values(by=['reports','name'],ascending=False)

#######################
##   filtering values #
#######################

#filter on a single value
msk=df['year']==2012
df.loc[msk]
#filter return sublist of columns
df.loc[msk,['name','year']]

#filter on more than 1 value
msk=df['year'].isin([2012,2014])
df.loc[msk]

#filter on more than 1 criteria
name_msk=df['name'].isin(['Jason','Molly'])
year_msk=df['year'].isin([2012,2013,2015])
gmask=(name_msk)&(year_msk)
df.loc[gmask,:]

#filter on true and false criteria
df.loc[(df['name']=='Jason')& ~(df['reports'].isnull()),:]


#######################
##   create new columns #
#######################

data = {'name': ['Jason', 'Molly', 'Tina', np.nan, 'Amy', 'Jason'], 
        'year': [2012, 2012, 2013, 2014, 2014, 2015],
        'born': [pd.NaT, pd.NaT, pd.Timestamp('1940-04-25'), pd.NaT, pd.Timestamp('1973-03-25'), pd.NaT],
        'reports': [4, 24, 31, 2, 3, np.nan]}
df = pd.DataFrame(data, index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma', 'Moscow'])

#conditional value in new column
df['reports_in_2012'] = np.where(df['year']==2012, df['reports'], 0)
#conditional creatipon on null
df['is_born'] = np.where(df['born'].isnull(), 'N', 'Y')


#######################
##   read write csv #
#######################
## reading
df=pd.read_csv(csvfile,header='infer')
#writing
df.to_csv(csvfile, header=True, index=False)