JGuizard
6/15/2016 - 10:19 AM

Pandas' DataFrame basics

Pandas' DataFrame basics

import pandas as pd
import numpy as np

#create a series with labels
labels = ['a','b','c','d','e']
s = pd.Series(np.random.randn(5),index=labels)

#create a dataframe
df = pd.DataFrame({'a':np.random.randn(6),
                   'b':['foo','bar']*3,
                   'c':np.random.randn(6)})

#load dataframe from csv
df.read_csv("data/mydata.csv")
close_px = pd.read_csv('stock_data.csv',index_col=0,parse_dates=True)
close_px['AA'][0]

#print first 5 rows
df.head() #add n as param for first n rows

#print last 5 rows
df.tail() #add n as param for last n rows

#view rows from index
df[10:20] #this operation is called slicing

#slice by column
df['col1'] # single column
df[['col1','col2']] #multiple cols

#slice by row ranges
df.ix['2001-01-01':'2001-12-31']

#slice by row and column
df.ix['2001-01-01':'2001-12-31',['col1','col2']]

#print max/min for a column
df['col'].max()
df['col'].min()

#show dataframe index and columns
df.index
df.columns

#show transpose matrix
df.transpose

#fill missing value
#http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.fillna.html
df.fillna(value="0") #fill with 0
df.fillna(method="ffill") #use last valid value
df.fillna(method="bfill",inplace=True) #use first valid value, inplace is used to modify the existing dataframe

#plot a graph
import matplotlib.pyplot as plt
df["col1","col2"].plot()
plt.show()