YogenGhodke
5/4/2020 - 2:51 PM

Pandas Library

import pandas as pd

# Display all columns at once in PyCharm
pd.set_option('display.width',500)          # Width of each cell (in characters)
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows', None)

df = pd.read_csv('poke_data.csv')           # df = data frame
df_xl = pd.read_excel('poke_data.xlsx')        # For excel files
df_txt = pd.read_csv('poke_data.txt',delimeter = '\t')

df.to_csv('modified.csv',index = False)           # Saves df to csv. to_excel also works
df.to_csv('modified.txt', index = False, sep = '\t')

# Prints detailed info of csv file
df.info()

# Give statistical info about the Dataset
df.describe().transpose()                          


print(df.head(3))           # Gives first 3 elements
print(df.tail(3))           # Gives last 3 elements

print(df.columns)           # Gives column titles
len(df)                     # Gives number of rows
len(df.columns)             # No. of Columns

# First bracket takes only 1 parameter i.e List.
df[ ['Name','Type'] ][0:5]              # Gives Name and Type columns (first 5 entries)
df.iloc[0:4]                            # Gives first 4 rows

# Get data by indexing :
df.iloc[2,1]                            # Gives data in second index first column

# For loop iteration
for index, row in df.iterrows():
    print(index,row)
    
# Gives only Name Column    
for index, row in df.iterrows():
    print(index,row['Name'])
    
# Add column to dataframe
df["New Column Name"] = df["col_1"] * df["col_2"]
    

df.sort_values('Name', ascending = True)                    # Sort by Name
df.sort_values(['Name','HP'], ascending = [1,0])            # Sort by Name, HP

df['Total'] = df['HP'] + df['Attack']                       # Creates new column

# Create column 'Total' of all rows and columns 4 to 9 and consisting of horizontal sum.
df['Total'] = df.iloc[:,4:9].sum(axis=1)                    

df = df.drop(columns=['Total'])                 # Remove Column

df = df[['Total','Defence','HP']]               # Rearranges Columns

cols = list(df.columns.values)                  # Returns column names as list

new_df.reset_index(drop = True, inplace = True)      # Resets Indexes

df.loc[df['Type 1'] == "Fire"]         # Return all fire type pokemon
print(df.loc[(df['Type 1'] == "Fire") & (df['Type 2'] == "Water")])

df.loc[~df['Name'].str.contains('Mega')]         # Search Keyword in data. ~ is negation.

# Search using Regular Expressions :
newdf = df.loc[df['Type 1'].str.contains('fire|grass', flags = re.I, regex = True)]

df.loc[df['Type 1'] == 'Fire', ['Type 1'] ] = 'Volcano'       # Find and Replace

df.loc[df['Total'] > 500 , ['Legendary', 'Generation' ]] = [True , 4]

# Aggregation Functions. 3 options = Mean, Sum, Count
# Grouping by Type to show average.
grp = df.groupby(['Type 1']).mean().sort_values('Attack',ascending=False)        

df['count'] = 1                             # Adds count column with value 1
df.groupby(['Type 1']).count()['count']         # Shows count in the column.


df["Film"] =  df["Film"].astype('category')     # Makes datatypes as category


# Get unique categories

t = df["genre"].unique()






# Filtering by condition

filter_object = df["abc"] < 2
print(df[filter_object])

# For Logical Conditions use Bitwise Operators

&       # AND
||      # OR


# Element by Element Filtering (Easy Method)

def Filterer(dataframe):
    dummy = dataframe["Country Name"] == "j"    # Change to relevant type
    for x in range(len(dataframe)):
        if dataframe["Country Name"][x].startswith("I"):    # Condition Here
           dummy[x] = True
    return dummy
    
df[Filterer(df)]


df["Country Name"].unique()     # Returns unique values
# Filter and get only 3rd column value
# Income Group is 3rd column


j = df[df["Country Name"] == "United States"]
list(j["Income Group"])[0]


# Directly get by index

j = df.iat[4,4]         # index at 4,4


# Get value by using Labels

j = df.at[2,"Country Name"]         # 2nd position country name