trailing_month_returns
# to install dependencies with anaconda installed
$ conda install pandas numpy pandas-datareader seaborn matplotlib
# else
$ pip install pandas numpy pandas-datareader seaborn matplotlib
# Comparing all approaches
# ========================
>>> all_approaches = pd.concat([approach1, approach2, approach3], axis=1,
>>> keys=['approach1', 'approach2', 'approach3'])
>>> all_approaches
approach1 approach2 approach3
year month
2014 1 -0.129583 -0.129583 NaN
2 0.028827 -0.009095 -0.009095
3 0.101449 0.101449 0.101449
4 0.026778 0.025877 0.025877
5 -0.021377 -0.021377 -0.021377
6 0.087527 0.085627 0.085627
7 0.111467 0.115493 0.115493
8 0.291743 0.269841 0.269841
9 -0.285916 -0.281250 -0.281250
10 0.078988 0.096047 0.096047
11 0.085130 0.071042 0.071042
12 -0.136578 -0.199663 -0.199663
...
2016 10 0.254927 0.284649 0.284649
11 0.020043 -0.026972 -0.026972
12 0.055222 -0.014386 -0.014386
2017 1 0.129630 0.107512 0.107512
2 0.058861 0.063966 0.063966
3 0.005060 0.020242 0.020242
4 -0.030115 -0.027243 -0.027243
5 -0.137291 -0.137291 -0.137291
6 -0.044223 -0.054340 -0.054340
7 0.056312 0.070896 0.070896
8 0.047782 0.069686 0.069686
9 0.102540 0.145277 NaN
# Approach 3: Now yes, the definitive approach
# ============================================
>>> approach3 = results_storage.asfreq('BM')\
>>> .set_index(['year', 'month'])\
>>> .close\
>>> .pct_change()
>>> approach3.tail(12)
year month
2016 9 -0.017665
10 0.284649
11 -0.026972
12 -0.014386
2017 1 0.107512
2 0.063966
3 0.020242
4 -0.027243
5 -0.137291
6 -0.054340
7 0.070896
8 0.069686
Name: close, dtype: float64
# Approach 2: starting from daily returns
# =======================================
>>> r = prices.pct_change()
>>> approach2 = r.groupby((r.index.year, r.index.month))\
>>> .apply(total_return_from_returns)
>>> approach2.tail(12)
Date Date
2016 10 0.284649
11 -0.026972
12 -0.014386
2017 1 0.107512
2 0.063966
3 0.020242
4 -0.027243
5 -0.137291
6 -0.054340
7 0.070896
8 0.069686
9 0.145277
Name: close, dtype: float64
>>> select_idx = (2017, 8)
>>> idx_approach1 = results_storage.groupby(['year', 'month'])['close'].groups[select_idx]
>>> last_group = results_storage.loc[idx_approach1]
close year month day week_day week_day_name
Date
20170801 29.299999 2017 8 1 1 Tuesday
20170802 30.480000 2017 8 2 2 Wednesday
20170803 30.680000 2017 8 3 3 Thursday
20170804 30.870001 2017 8 4 4 Friday
20170807 31.350000 2017 8 7 0 Monday
...
20170825 32.000000 2017 8 25 4 Friday
20170828 31.700001 2017 8 28 0 Monday
20170829 31.490000 2017 8 29 1 Tuesday
20170830 31.170000 2017 8 30 2 Wednesday
20170831 30.700001 2017 8 31 3 Thursday
# Approach 1: starting from prices
# ================================
>>> approach1 = results_storage.groupby(['year', 'month'], )['close'].apply(total_return)
>>> approach1.tail(12)
year month
2016 10 0.254927
11 0.020043
12 0.055222
2017 1 0.129630
2 0.058861
3 0.005060
4 -0.030115
5 -0.137291
6 -0.044223
7 0.056312
8 0.047782
9 0.102540
Name: close, dtype: float64
>>> data
open high low close adj close volume
Date
20140102 24.450001 24.480000 23.950001 24.000000 18.998478 4077900.0
20140103 24.020000 24.250000 23.850000 24.250000 19.196381 5094600.0
20140106 24.020000 24.049999 23.610001 23.889999 18.911404 3983200.0
20140107 23.900000 24.100000 23.420000 23.480000 18.586851 4596200.0
20140108 23.500000 23.570000 22.969999 23.010000 18.214798 6737300.0
....
20170920 34.660000 35.130001 34.310001 34.759998 34.759998 12143700.0
20170921 35.400002 35.970001 35.220001 35.520000 35.520000 16532900.0
20170922 35.369999 35.419998 35.020000 35.299999 35.299999 5420100.0
20170925 35.619999 35.740002 34.759998 34.799999 34.799999 5305700.0
20170926 35.160000 35.279999 34.950001 35.160000 35.160000 1947000.0
>>> today = '20170926' # to make static this script.
>>> tckr = 'BBAS3.SA' # Banco do Brasil SA
# download data
>>> data = pdr.get_data_yahoo(tckr, 2014, today)
>>> data = data.asfreq('B') # add frequency needed for some pandas functionalities releated with offsets
>>> data.columns = data.columns.map(lambda col: col.lower())
# load packages
import pandas as pd
import numpy as np
import pandas_datareader as pdr
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt
from IPython.display import set_matplotlib_formats
# %matplotlib inline
# ploting setup
plt.style.use(['seaborn-white', 'seaborn-paper'])
matplotlib.rc('font', family='Times New Roman', size=15)
set_matplotlib_formats('png', 'png', quality=80)
plt.rcParams['savefig.dpi'] = 100
plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = 10, 6
plt.rcParams['axes.labelsize'] = 10
plt.rcParams['axes.titlesize'] = 15
plt.rcParams['font.size'] = 12
plt.rcParams['lines.linewidth'] = 1.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['ytick.labelsize'] = 11
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.serif'] = 'cm'
plt.rcParams['axes.grid'] = True
kw_save = dict(bbox_iches='tight', transparent=True)
# asset information
asset_info = '''
Banco do Brasil S.A. (BBAS3.SA)
Sao Paolo - Sao Paolo Delayed Price. Currency in BRL.
Source: https://finance.yahoo.com/quote/BBAS3.SA
'''
# useful functions
# ================
def total_return(prices):
"""Retuns the return between the first and last value of the DataFrame.
Parameters
----------
prices : pandas.Series or pandas.DataFrame
Returns
-------
total_return : float or pandas.Series
Depending on the input passed returns a float or a pandas.Series.
"""
return prices.iloc[-1] / prices.iloc[0] - 1
def total_return_from_returns(returns):
"""Retuns the return between the first and last value of the DataFrame.
Parameters
----------
returns : pandas.Series or pandas.DataFrame
Returns
-------
total_return : float or pandas.Series
Depending on the input passed returns a float or a pandas.Series.
"""
return (returns + 1).prod() - 1
def plot_this(df, title, figsize=None, ylabel='',
output_file='imgs/fig_rets_approach1.png', bottom_adj=0.25,
txt_ymin=-0.4, bar=False):
if bar:
ax = df.plot.bar(title=title, figsize=figsize)
else:
ax = df.plot(title=title, figsize=figsize)
sns.despine()
plt.ylabel(ylabel)
plt.tight_layout()
plt.text(0, txt_ymin, asset_info, transform=ax.transAxes, fontsize=9)
plt.gcf().subplots_adjust(bottom=bottom_adj)
plt.savefig(output_file, **kw_save)
# Geting data
# ===========
today = '20170926' # to make static this script.
tckr = 'BBAS3.SA' # Banco do Brasil SA
# download data
data = pdr.get_data_yahoo(tckr, 2014, today)
data = data.asfreq('B') # add frequency needed for some pandas functionalities releated with offsets
data.columns = data.columns.map(lambda col: col.lower())
data.head() # first values
data.tail() # last values
# what about NaNs
data.isnull().sum()
data.ffill(inplace=True) # to avoid problems with NaNs.
# using close prices
prices = data.close.copy()
# we convert to DataFrame to make easy store more series.
results_storage = prices.to_frame().copy()
# plotting
plot_this(prices, title='Prices of %s' % tckr, ylabel='Prices in BRL',
txt_ymin=-0.2, bottom_adj=0.15, output_file='imgs/fig_prices.png',)
# extract some date information
results_storage['year'] = prices.index.year
results_storage['month'] = prices.index.month
results_storage['day'] = prices.index.day
results_storage['week_day'] = prices.index.dayofweek
results_storage['week_day_name'] = prices.index.strftime('%A')
results_storage.tail(10)
# ================
# Trailing Returns
# ================
# Approach 1: starting from prices
# ================================
approach1 = results_storage.groupby(['year', 'month'], )['close']\
.apply(total_return)
approach1.tail(10)
# ploting
# -------
plot_this(approach1, bar=True, title='Trailing returns: Approach 1',
ylabel='Returns (parts per unit)', txt_ymin=-0.4, bottom_adj=0.25,
output_file='imgs/fig_rets_approach1.png')
# Nota bene: What means approach 1:
# means that we are selecting all available prices INSIDE a month and then we
# calculate the total return with that prices.
select_idx = (2017, 8)
idx_approach1 = results_storage.groupby(['year', 'month'])['close']\
.groups[select_idx]
last_group = results_storage.loc[idx_approach1]
last_group.head()
last_group.tail()
# example of the calculation
total_return(last_group.close)
approach1.loc[select_idx]
# Approach 2: starting from daily returns
# =======================================
r = prices.pct_change()
approach2 = r.groupby((r.index.year, r.index.month))\
.apply(total_return_from_returns)
approach2.tail(10)
plot_this(approach2, bar=True, title='Trailing returns: Approach 2',
ylabel='Returns (parts per unit)', txt_ymin=-0.4, bottom_adj=0.25,
output_file='imgs/fig_rets_approach2.png')
# However, this approximation is almost correct since we have started from 2014
# prices, therefore it is not possible to calculate the return of the first
# month of that first available year.
# This same situation occurs in the last month available (current), as no data
# are available for the last day of the month, the return is also not
# comparable with the rest.
# Approach 3: Now yes, the definitive approach
# ============================================
# ... what mean approach3, first trimm prices according to end of month this
# then with that prices calculate returns. So we are calculating returns between
# different months and mostly in differents days whichs always are the last
# bussiness day of the month.
# with "asfreq" we decimate the prices, then group by year and month, so we
# have all the prices at the end of the working month available in the DataFrame.
# Finally we calculate the return of this new series with "pct_change".
approach3 = results_storage.asfreq('BM')\
.set_index(['year', 'month'])\
.close\
.pct_change()
approach3.tail(10)
plot_this(approach3, bar=True, title='Trailing returns: Approach 3',
ylabel='Returns (parts per unit)', txt_ymin=-0.4, bottom_adj=0.25,
output_file='imgs/fig_rets_approach3.png')
# Comparing all approaches
# ========================
all_approaches = pd.concat([approach1, approach2, approach3], axis=1,
keys=['approach1', 'approach2', 'approach3'])
plot_this(all_approaches, title='Comparing all approaches',
output_file='imgs/all_approaches.png', bar=True,
ylabel='Returns (parts per unit)', figsize=(15,8), bottom_adj=0.2,
txt_ymin=-0.3)
#
# useful functions
# ================
def total_return(prices):
"""Retuns the return between the first and last value of the DataFrame.
Parameters
----------
prices : pandas.Series or pandas.DataFrame
Returns
-------
total_return : float or pandas.Series
Depending on the input passed returns a float or a pandas.Series.
"""
return prices.iloc[-1] / prices.iloc[0] - 1
def total_return_from_returns(returns):
"""Retuns the return between the first and last value of the DataFrame.
Parameters
----------
returns : pandas.Series or pandas.DataFrame
Returns
-------
total_return : float or pandas.Series
Depending on the input passed returns a float or a pandas.Series.
"""
return (returns + 1).prod() - 1
def plot_this(df, title, figsize=None, ylabel='',
output_file='imgs/fig_rets_approach1.png', bottom_adj=0.25,
txt_ymin=-0.4, bar=False):
if bar:
ax = df.plot.bar(title=title, figsize=figsize)
else:
ax = df.plot(title=title, figsize=figsize)
sns.despine()
plt.ylabel(ylabel)
plt.tight_layout()
plt.text(0, txt_ymin, asset_info, transform=ax.transAxes, fontsize=9)
plt.gcf().subplots_adjust(bottom=bottom_adj)
plt.savefig(output_file, **kw_save)