MtnFranke
11/23/2017 - 11:21 AM

Pandas Column Manipulation

import pandas as pd
import numpy as np
    
df = pd.read_csv('http://samplecsvs.s3.amazonaws.com/SalesJan2009.csv')

# Cast column(s)
df[['Transaction_date_parsed', 'Account_Created_parsed']] = \
    df[['Transaction_date', 'Account_Created']].apply(pd.to_datetime)
    
# Round numeric column to full ten
df['Latitude_rounded'] = df['Latitude'].map(lambda x: np.around(x, decimals=-1))

# New column based on map
df['product_lower'] = df['Product'].map(lambda x: x.lower())

# New column based on rows
def f(row):
    return str(row['Country']) + '_' + str(row['State']) + '_' + str(row['City']) 

df['region'] = df.apply (lambda row: f(row), axis=1)
 
# New column based on a column statement
df['lat_lon'] = (df['Latitude'].astype(float) * df['Longitude'].astype(float))

# Split one column into multiple columns
df[['date', 'time']] = df['Transaction_date'].str.split('\s+', expand=True)

# Print sample
df.sample(10)