import pandas as pd
# Display all columns at once in PyCharm
pd.set_option('display.width',500) # Width of each cell (in characters)
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows', None)
df = pd.read_csv('poke_data.csv') # df = data frame
df_xl = pd.read_excel('poke_data.xlsx') # For excel files
df_txt = pd.read_csv('poke_data.txt',delimeter = '\t')
df.to_csv('modified.csv',index = False) # Saves df to csv. to_excel also works
df.to_csv('modified.txt', index = False, sep = '\t')
# Prints detailed info of csv file
df.info()
# Give statistical info about the Dataset
df.describe().transpose()
print(df.head(3)) # Gives first 3 elements
print(df.tail(3)) # Gives last 3 elements
print(df.columns) # Gives column titles
len(df) # Gives number of rows
len(df.columns) # No. of Columns
# First bracket takes only 1 parameter i.e List.
df[ ['Name','Type'] ][0:5] # Gives Name and Type columns (first 5 entries)
df.iloc[0:4] # Gives first 4 rows
# Get data by indexing :
df.iloc[2,1] # Gives data in second index first column
# For loop iteration
for index, row in df.iterrows():
print(index,row)
# Gives only Name Column
for index, row in df.iterrows():
print(index,row['Name'])
# Add column to dataframe
df["New Column Name"] = df["col_1"] * df["col_2"]
df.sort_values('Name', ascending = True) # Sort by Name
df.sort_values(['Name','HP'], ascending = [1,0]) # Sort by Name, HP
df['Total'] = df['HP'] + df['Attack'] # Creates new column
# Create column 'Total' of all rows and columns 4 to 9 and consisting of horizontal sum.
df['Total'] = df.iloc[:,4:9].sum(axis=1)
df = df.drop(columns=['Total']) # Remove Column
df = df[['Total','Defence','HP']] # Rearranges Columns
cols = list(df.columns.values) # Returns column names as list
new_df.reset_index(drop = True, inplace = True) # Resets Indexes
df.loc[df['Type 1'] == "Fire"] # Return all fire type pokemon
print(df.loc[(df['Type 1'] == "Fire") & (df['Type 2'] == "Water")])
df.loc[~df['Name'].str.contains('Mega')] # Search Keyword in data. ~ is negation.
# Search using Regular Expressions :
newdf = df.loc[df['Type 1'].str.contains('fire|grass', flags = re.I, regex = True)]
df.loc[df['Type 1'] == 'Fire', ['Type 1'] ] = 'Volcano' # Find and Replace
df.loc[df['Total'] > 500 , ['Legendary', 'Generation' ]] = [True , 4]
# Aggregation Functions. 3 options = Mean, Sum, Count
# Grouping by Type to show average.
grp = df.groupby(['Type 1']).mean().sort_values('Attack',ascending=False)
df['count'] = 1 # Adds count column with value 1
df.groupby(['Type 1']).count()['count'] # Shows count in the column.
df["Film"] = df["Film"].astype('category') # Makes datatypes as category
# Get unique categories
t = df["genre"].unique()