import pandas as pd
import numpy as np
df = pd.read_csv('http://samplecsvs.s3.amazonaws.com/SalesJan2009.csv')
# Cast column(s)
df[['Transaction_date_parsed', 'Account_Created_parsed']] = \
df[['Transaction_date', 'Account_Created']].apply(pd.to_datetime)
# Round numeric column to full ten
df['Latitude_rounded'] = df['Latitude'].map(lambda x: np.around(x, decimals=-1))
# New column based on map
df['product_lower'] = df['Product'].map(lambda x: x.lower())
# New column based on rows
def f(row):
return str(row['Country']) + '_' + str(row['State']) + '_' + str(row['City'])
df['region'] = df.apply (lambda row: f(row), axis=1)
# New column based on a column statement
df['lat_lon'] = (df['Latitude'].astype(float) * df['Longitude'].astype(float))
# Split one column into multiple columns
df[['date', 'time']] = df['Transaction_date'].str.split('\s+', expand=True)
# Print sample
df.sample(10)