"""
B-006 - Simple Graphing with IPython and Pandas
https://pbpython.com/simple-graphing-pandas.html
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
sales=pd.read_csv("https://pbpython.com/extras/sample-salesv2.csv", parse_dates=['date'])
customers = sales[['name','ext price','date']]
customer_group = customers.groupby('name')
sales_totals = customer_group.sum()
my_plot = sales_totals.plot(kind='bar')
•sorting the data in descending order
•removing the legend
•adding a title
•labeling the axes
my_plot = sales_totals.sort_values(by='ext price', ascending=False).plot(kind='bar',legend=None,title="Total Sales by Customer")
my_plot.set_xlabel("Customers")
my_plot.set_ylabel("Sales ($)")
"""
B-006 - Simple Graphing with IPython and Pandas
https://pbpython.com/simple-graphing-pandas.html
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
sales=pd.read_csv("https://pbpython.com/extras/sample-salesv2.csv", parse_dates=['date'])
customers = sales[['name','ext price','date']]
customer_group = customers.groupby('name')
sales_totals = customer_group.sum()
customers = sales[['name','category','ext price','date']]
category_group=pd.crosstab(customers.name, customers.category, values=customers['ext price'], aggfunc='sum')
my_plot = category_group.plot(kind='bar',stacked=True,title="Total Sales by Customer", figsize=(9, 7))
my_plot.set_xlabel("Customers")
my_plot.set_ylabel("Sales")
my_plot.legend(["Total","Belts","Shirts","Shoes"], loc=9, ncol=4)
"""
B-008 Creating a Waterfall Chart in Python
https://pbpython.com/simple-graphing-pandas.html
"""
import xlwings as xw
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
# Connects to a saved book
wb1 = xw.Book('C:/Users/Joseph Yu/Desktop/Book1.xlsx')
sheet = wb1.sheets[0]
"""
!!! Make sure you would follow the exact pattern for index =, data = {'amount': ...} !!!
"""
#Data to plot. Do not include a total, it will be calculated
index = sheet.range('A1').expand('down').value
data = {'amount': sheet.range('B1').expand('down').value}
# Alternative: manually input data from list/dict
# index = ['sales','returns','credit fees','rebates','late charges','shipping']
# data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
# trans = pd.DataFrame(data=data,index=index)
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(10, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')
"""
B-006 - Simple Graphing with IPython and Pandas
https://pbpython.com/simple-graphing-pandas.html
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
sales=pd.read_csv("https://pbpython.com/extras/sample-salesv2.csv", parse_dates=['date'])
purchase_patterns = sales[['ext price','date']]
purchase_plot = purchase_patterns['ext price'].hist(bins=20)
purchase_plot.set_title("Purchase Patterns")
purchase_plot.set_xlabel("Order Amount($)")
purchase_plot.set_ylabel("Number of orders")
"""
B-006 - Simple Graphing with IPython and Pandas
https://pbpython.com/simple-graphing-pandas.html
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
sales=pd.read_csv("https://pbpython.com/extras/sample-salesv2.csv", parse_dates=['date'])
purchase_patterns = sales[['ext price','date']]
purchase_patterns = purchase_patterns.set_index('date')
purchase_plot = purchase_patterns.resample('M').sum().plot(title="Total Sales by Month",legend=None)
fig = purchase_plot.get_figure()
# fig.savefig("total-sales.png")
"""
B-055 - Building Bullet Graphs and Waterfall Charts with Bokeh
https://pbpython.com/bokeh-bullet-waterfall.html
"""
# enable Bokeh’s output to display
import pandas as pd
import numpy as np
from bokeh.io import show, output_notebook
from bokeh.palettes import PuBu4
from bokeh.plotting import figure
from bokeh.models import Label
output_notebook()
# BokehJS 0.13.0 successfully loaded.
# Connects to a saved book (looks in all Excel instances)
wb1 = xw.Book('C:/Users/Joseph Yu/Desktop/Bullet-Graphs-and-Waterfall-Charts-with-Bokeh.xlsx')
sheet = wb1.sheets[0]
sheet.range('A1').expand('table').value
data = sheet.range('A1').expand('table').value
# Load in the data
data= [['John Smith', 105.0, 120.0], ['Jane Jones', 99.0, 110.0], ['Fred Flintstone', 109.0, 125.0], ['Barney Rubble', 135.0, 123.0], ['Mr T', 45.0, 105.0]]
limits = [0, 20, 60, 100, 160]
labels = ["Poor", "OK", "Good", "Excellent"]
cats = [x[0] for x in data]
# Load in the data
data= [("John Smith", 105, 120),
("Jane Jones", 99, 110),
("Fred Flintstone", 109, 125),
("Barney Rubble", 135, 123),
("Mr T", 45, 105)]
limits = [0, 20, 60, 100, 160]
labels = ["Poor", "OK", "Good", "Excellent"]
cats = [x[0] for x in data]
create the Bokeh figure and set a couple of options
# Create the base figure
p=figure(title="Sales Rep Performance", plot_height=350, plot_width=800, y_range=cats)
p.x_range.range_padding = 0
p.grid.grid_line_color = None
p.xaxis[0].ticker.num_minor_ticks = 0
# Here's the format of the data we need
print(list(zip(limits[:-1], limits[1:], PuBu4[::-1])))
[(0, 20, '#f1eef6'), (20, 60, '#bdc9e1'), (60, 100, '#74a9cf'), (100, 160, '#0570b0')]
for left, right, color in zip(limits[:-1], limits[1:], PuBu4[::-1]):
p.hbar(y=cats, left=left, right=right, height=0.8, color=color)
show(p)
# Now add the black bars for the actual performance
perf = [x[1] for x in data]
p.hbar(y=cats, left=0, right=perf, height=0.3, color="black")
show(p)
# Add the segment for the target
comp = [x[2]for x in data]
p.segment(x0=comp, y0=[(x, -0.5) for x in cats], x1=comp,
y1=[(x, 0.5) for x in cats], color="white", line_width=2)
show(p)
# Add the labels
for start, label in zip(limits[:-1], labels):
p.add_layout(Label(x=start, y=0, text=label, text_font_size="10pt",
text_color='black', y_offset=5, x_offset=15))
show(p)
import xlwings as xw
import numpy as np
import pandas as pd
import datetime as dt
import time
import sys
# Matplotlib
%matplotlib inline
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
import matplotlib
# Swap rate example
years = [1, 2, 3, 4, 5, 7, 10, 30]
swap_rate = [0.0079, 0.0094, 0.0107, 0.0119,
0.013, 0.0151, 0.0174, 0.022]
years_new = np.linspace(1, 30, num=150)
interpolate = interp1d(years, swap_rate, kind='quadratic')
"""
Example: update it by changing mannually
years = [1, 2, 3, 4, 5, 7, 10]
swap_rate = [0.0079, 0.0094, 0.0107, 0.0119,
0.013, 0.0151, 0.0174]
years_new = np.linspace(1, 10, num=10)
interpolate = interp1d(years, swap_rate, kind='quadratic')
"""
fig = plt.figure(figsize=(6, 4))
swaprate_plot = plt.plot(years, swap_rate, 'o',
years_new, interpolate(years_new), '-')
wb = xw.Book()
sheet = wb.sheets[0]
plot = sheet.pictures.add(fig, name='SwapRate', update=True)
# Fine Tuning - manipulate properties after adding the picture
plot.height = plot.height / 2
plot.width = plot.width / 2
# Fine Tuning - create new chart underneath
width, height = fig.get_size_inches()
dpi = fig.get_dpi()
sheet.pictures.add(fig, name='SwapRate2', update=True,
left=sheet.range('A25').left, top=sheet.range('A25').top,
width=width * dpi / 2, height=height * dpi / 2)