[TOC]
Y = data['TV'] # column
Y = data.TV
df.ix['Arizona', 2] # Select the third cell in the row named Arizona
df.ix[2, 'deaths'] # Select the third cell down in the column named deaths
df[:2] # first two rows
df.ix['Maricopa'] # view a row
df.ix[:, 'coverage'] # view a column
df.ix['Yuma', 'coverage'] # view the value based on a row and column
df.iloc[:2] # rows by row number
df.iloc[1:2] # Select the second and third row
df.iloc[2:] # Select every row after the third row
df.iloc[3:6,0:3] #
df.loc[:'Arizona'] # all rows by index label
df.ix[['Arizona', 'Texas']] # .ix is the combination of both .loc and .iloc. Integers are first considered labels,if not found, falls back on pos indexing
df.query('A > C')
df.query('A > 0')
df.query('A > 0 & A < 1')
df.query('A > B | A > C')
df[df['coverage'] > 50] # all rows where coverage is more than 50
df[(df['deaths'] > 500) | (df['deaths'] < 50)]
df[(df['score'] > 1) & (df['score'] < 5)]
df[~(df['regiment'] == 'Dragoons')] # Select all the regiments not named "Dragoons"
df[df['age'].notnull() & df['sex'].notnull()] # ignore the missing data points
df[df.name.isin(value_list)] # value_list = ['Tina', 'Molly', 'Jason']
df[~df.name.isin(value_list)]
df2[df2.E.str.contains("tw|ou")]
df['raw'].str.contains('....-..-..', regex=True) # regex
df[df['country'].map(lambda country: 'Syria' in country)]
df.take(np.random.permutation(len(df))[:2])
## Cell ranges
```python
df['x3'][:1] # take column 'x3',
df.iloc[:,:2] # Select the first 2 columns
feature_cols = ['TV','Radio','Newspaper']
x = data[feature_cols]
data[['TV','Radio','Newspaper']]
df.loc[:,['A','B']] # syntax is: df.loc[rows_index, cols_index]
df.filter(like='data')
df['preTestScore'].where(df['postTestScore'] > 50) # Find where a value exists in a column
df.sort_values(by='reports', ascending=0) # Sort the dataframe's rows by reports, in descending order
df.sort_values(by=['coverage', 'reports']) # Sort the dataframe's rows by coverage and then by reports, in ascending order
data.head()
data.tail()
data.tail().transpose()
data.shape()
df.name.unique()
data['preTestScore'].groupby(df['company']).describe()
data['preTestScore'].describe()
data['preTestScore'].count()
data['preTestScore'].min()
data.group.value_counts()
df.corr()
pd.rolling_mean(df, 2) # Calculate the moving average. That is, take
# the first two values, average them,
# then drop the first and add the third, etc.
#regiment company
#Dragoons 1st 3.5
# 2nd 27.5
#Nighthawks 1st 14.0
# 2nd 16.5
#Scouts 1st 2.5
# 2nd 2.5
#dtype: float64
data['preTestScore'].groupby([df['regiment'], data['company']]).mean()
#company 1st 2nd
#regiment
#Dragoons 3.5 27.5
#Nighthawks 14.0 16.5
#Scouts 2.5 2.5
data['preTestScore'].groupby([data['regiment'], data['company']]).mean().unstack()
#preTestScore postTestScore
#regiment company
#Dragoons 1st 3.5 47.5
#2nd 27.5 75.5
#Nighthawks 1st 14.0 59.5
#2nd 16.5 59.5
#Scouts 1st 2.5 66.0
#2nd 2.5 66.0
df.groupby(['regiment', 'company']).mean()
Input
guardCorps corps1 corps2 corps3 corps4 corps5 corps6 corps7 corps8 corps9 corps10 corps11 corps14 corps15
1875 0 0 0 0 0 0 0 1 1 0 0 0 1 0
1876 2 0 0 0 1 0 0 0 0 0 0 0 1 1
1877 2 0 0 0 0 0 1 1 0 0 1 0 2 0
result = horsekick.apply(pd.value_counts).fillna(0); result
regiment company experience name preTestScore postTestScore
0 Nighthawks infantry veteran Miller 4 25
1 Nighthawks infantry rookie Jacobson 24 94
2 Nighthawks cavalry veteran Ali 31 57
-->
company cavalry infantry All
regiment
Dragoons 2 2 4
Nighthawks 2 2 4
pd.crosstab(df.regiment, df.company, margins=True)
df['preTestScore'].idxmax() # row with max value