import pandas as pd
import numpy as np
df = pd.read_excel("../data/sales-funnel.xlsx")
table = pd.pivot_table(df,index=["Manager","Rep","Product"],
values=["Price","Quantity"],
aggfunc=[np.sum,np.mean],fill_value=0)
import pandas as pd
import seaborn as sns
# Define the headers since the data does not have any
headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration",
"num_doors", "body_style", "drive_wheels", "engine_location",
"wheel_base", "length", "width", "height", "curb_weight",
"engine_type", "num_cylinders", "engine_size", "fuel_system",
"bore", "stroke", "compression_ratio", "horsepower", "peak_rpm",
"city_mpg", "highway_mpg", "price"]
# Read in the CSV file and convert "?" to NaN
df_raw = pd.read_csv("http://mlr.cs.umass.edu/ml/machine-learning-databases/autos/imports-85.data",
header=None, names=headers, na_values="?" )
# Define a list of models that we want to review
models = ["toyota","nissan","mazda", "honda", "mitsubishi", "subaru", "volkswagen", "volvo"]
# Create a copy of the data with only the top 8 manufacturers
df = df_raw[df_raw.make.isin(models)].copy()
pd.crosstab(df.make, df.body_style)
# df.groupby(['make', 'body_style'])['body_style'].count().unstack().fillna(0)
# df.pivot_table(index='make', columns='body_style', aggfunc={'body_style':len}, fill_value=0)
pd.crosstab(df.make, df.num_doors, margins=True, margins_name="Total")
pd.crosstab(df.make, df.body_style, values=df.curb_weight, aggfunc='mean').round(0)
pd.crosstab(df.make, df.body_style, normalize=True)
pd.crosstab(df.make, df.body_style, normalize='columns')
pd.crosstab(df.make, df.body_style, normalize='index')
import pandas as pd
import seaborn as sns
# Define the headers since the data does not have any
headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration",
"num_doors", "body_style", "drive_wheels", "engine_location",
"wheel_base", "length", "width", "height", "curb_weight",
"engine_type", "num_cylinders", "engine_size", "fuel_system",
"bore", "stroke", "compression_ratio", "horsepower", "peak_rpm",
"city_mpg", "highway_mpg", "price"]
# Read in the CSV file and convert "?" to NaN
df_raw = pd.read_csv("http://mlr.cs.umass.edu/ml/machine-learning-databases/autos/imports-85.data",
header=None, names=headers, na_values="?" )
# Define a list of models that we want to review
models = ["toyota","nissan","mazda", "honda", "mitsubishi", "subaru", "volkswagen", "volvo"]
# Create a copy of the data with only the top 8 manufacturers
df = df_raw[df_raw.make.isin(models)].copy()
pd.crosstab(df.make, df.body_style)
# df.groupby(['make', 'body_style'])['body_style'].count().unstack().fillna(0)
# df.pivot_table(index='make', columns='body_style', aggfunc={'body_style':len}, fill_value=0)
pd.crosstab(df.make, [df.body_style, df.drive_wheels])
pd.crosstab([df.make, df.num_doors], [df.body_style, df.drive_wheels],
rownames=['Auto Manufacturer', "Doors"],
colnames=['Body Style', "Drive Type"],
dropna=False)
import pandas as pd
import seaborn as sns
# Define the headers since the data does not have any
headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration",
"num_doors", "body_style", "drive_wheels", "engine_location",
"wheel_base", "length", "width", "height", "curb_weight",
"engine_type", "num_cylinders", "engine_size", "fuel_system",
"bore", "stroke", "compression_ratio", "horsepower", "peak_rpm",
"city_mpg", "highway_mpg", "price"]
# Read in the CSV file and convert "?" to NaN
df_raw = pd.read_csv("http://mlr.cs.umass.edu/ml/machine-learning-databases/autos/imports-85.data",
header=None, names=headers, na_values="?" )
# Define a list of models that we want to review
models = ["toyota","nissan","mazda", "honda", "mitsubishi", "subaru", "volkswagen", "volvo"]
# Create a copy of the data with only the top 8 manufacturers
df = df_raw[df_raw.make.isin(models)].copy()
# Visualizing
sns.heatmap(pd.crosstab([df.make, df.num_doors], [df.body_style, df.drive_wheels]),
cmap="YlGnBu", annot=True, cbar=False)
import pandas as pd
import numpy as np
SALES=pd.read_csv("../data/sample-sales.csv")
report = SALES.pivot_table(values=['quantity'],index=['Account Name'],columns=['category'], aggfunc=np.sum)
report = SALES.pivot_table(values=['quantity'],index=['Account Name'],columns=['category'], fill_value=0, aggfunc=np.sum)
report = SALES.pivot_table(values=['ext price','quantity'],index=['Account Name'],columns=['category'], fill_value=0,aggfunc=np.sum)
import pandas as pd
import numpy as np
df = pd.read_excel("../data/sales-funnel.xlsx")
df["Status"] = df["Status"].astype("category")
df["Status"].cat.set_categories(["won","pending","presented","declined"],inplace=True)
df.head()
pd.pivot_table(df,index=["Name"])
pd.pivot_table(df,index=["Name","Rep","Manager"])
pd.pivot_table(df,index=["Manager","Rep"])
pd.pivot_table(df,index=["Manager","Rep"],values=["Price"])
pd.pivot_table(df,index=["Manager","Rep"],values=["Price"],aggfunc=np.sum)
pd.pivot_table(df,index=["Manager","Rep"],values=["Price"],aggfunc=[np.mean,len])
import pandas as pd
import numpy as np
df = pd.read_excel("../data/sales-funnel.xlsx")
df["Status"] = df["Status"].astype("category")
df["Status"].cat.set_categories(["won","pending","presented","declined"],inplace=True)
df.head()
pd.pivot_table(df,index=["Manager","Rep"],values=["Price"],
columns=["Product"],aggfunc=[np.sum])
pd.pivot_table(df,index=["Manager","Rep"],values=["Price","Quantity"],
columns=["Product"],aggfunc=[np.sum],fill_value=0)
pd.pivot_table(df,index=["Manager","Rep","Product"],
values=["Price","Quantity"],aggfunc=[np.sum],fill_value=0)
pd.pivot_table(df,index=["Manager","Rep","Product"],
values=["Price","Quantity"],
aggfunc=[np.sum,np.mean],fill_value=0,margins=True)
pd.pivot_table(df,index=["Manager","Status"],values=["Price"],
aggfunc=[np.sum],fill_value=0,margins=True)
import pandas as pd
import numpy as np
df = pd.read_excel("../data/sales-funnel.xlsx")
table = pd.pivot_table(df,index=["Manager","Status"],columns=["Product"],values=["Quantity","Price"],
aggfunc={"Quantity":len,"Price":[np.sum,np.mean]},fill_value=0)
table
table.query('Manager == ["Debra Henley"]')
table.query('Status == ["pending","won"]')