mmngreco
9/26/2017 - 2:26 PM

trailing_month_returns

trailing_month_returns

# to install dependencies with anaconda installed
$ conda install pandas numpy pandas-datareader seaborn matplotlib
# else
$ pip install pandas numpy pandas-datareader seaborn matplotlib
# Comparing all approaches
# ========================
>>> all_approaches = pd.concat([approach1, approach2, approach3], axis=1,
>>>                            keys=['approach1', 'approach2', 'approach3'])

>>> all_approaches

                approach1	approach2	approach3
year	month			
2014	1	-0.129583	-0.129583	NaN
        2	0.028827	-0.009095	-0.009095
        3	0.101449	0.101449	0.101449
        4	0.026778	0.025877	0.025877
        5	-0.021377	-0.021377	-0.021377
        6	0.087527	0.085627	0.085627
        7	0.111467	0.115493	0.115493
        8	0.291743	0.269841	0.269841
        9	-0.285916	-0.281250	-0.281250
        10	0.078988	0.096047	0.096047
        11	0.085130	0.071042	0.071042
        12	-0.136578	-0.199663	-0.199663
...
2016	10	0.254927	0.284649	0.284649
        11	0.020043	-0.026972	-0.026972
        12	0.055222	-0.014386	-0.014386
2017	1	0.129630	0.107512	0.107512
        2	0.058861	0.063966	0.063966
        3	0.005060	0.020242	0.020242
        4	-0.030115	-0.027243	-0.027243
        5	-0.137291	-0.137291	-0.137291
        6	-0.044223	-0.054340	-0.054340
        7	0.056312	0.070896	0.070896
        8	0.047782	0.069686	0.069686
        9	0.102540	0.145277	NaN
# Approach 3: Now yes, the definitive approach
# ============================================
>>> approach3 = results_storage.asfreq('BM')\
>>>                            .set_index(['year', 'month'])\
>>>                            .close\
>>>                            .pct_change()
>>> approach3.tail(12)

year  month
2016  9       -0.017665
      10       0.284649
      11      -0.026972
      12      -0.014386
2017  1        0.107512
      2        0.063966
      3        0.020242
      4       -0.027243
      5       -0.137291
      6       -0.054340
      7        0.070896
      8        0.069686
Name: close, dtype: float64
# Approach 2: starting from daily returns
# =======================================
>>> r = prices.pct_change()
>>> approach2 = r.groupby((r.index.year, r.index.month))\
>>>              .apply(total_return_from_returns)
>>> approach2.tail(12)

Date  Date
2016  10      0.284649
      11     -0.026972
      12     -0.014386
2017  1       0.107512
      2       0.063966
      3       0.020242
      4      -0.027243
      5      -0.137291
      6      -0.054340
      7       0.070896
      8       0.069686
      9       0.145277
Name: close, dtype: float64
>>> select_idx = (2017, 8)
>>> idx_approach1 = results_storage.groupby(['year', 'month'])['close'].groups[select_idx]
>>> last_group = results_storage.loc[idx_approach1]

                   close	year	month	day	week_day week_day_name
Date						
20170801	29.299999	2017	8	1	1	Tuesday
20170802	30.480000	2017	8	2	2	Wednesday
20170803	30.680000	2017	8	3	3	Thursday
20170804	30.870001	2017	8	4	4	Friday
20170807	31.350000	2017	8	7	0	Monday
...
20170825	32.000000	2017	8	25	4	Friday
20170828	31.700001	2017	8	28	0	Monday
20170829	31.490000	2017	8	29	1	Tuesday
20170830	31.170000	2017	8	30	2	Wednesday
20170831	30.700001	2017	8	31	3	Thursday

# Approach 1: starting from prices
# ================================
>>> approach1 = results_storage.groupby(['year', 'month'], )['close'].apply(total_return)
>>> approach1.tail(12)

year  month
2016  10       0.254927
      11       0.020043
      12       0.055222
2017  1        0.129630
      2        0.058861
      3        0.005060
      4       -0.030115
      5       -0.137291
      6       -0.044223
      7        0.056312
      8        0.047782
      9        0.102540
Name: close, dtype: float64
>>> data
                open	     high	     low	    close	adj close	  volume
Date						
20140102	24.450001	24.480000	23.950001	24.000000	18.998478	4077900.0
20140103	24.020000	24.250000	23.850000	24.250000	19.196381	5094600.0
20140106	24.020000	24.049999	23.610001	23.889999	18.911404	3983200.0
20140107	23.900000	24.100000	23.420000	23.480000	18.586851	4596200.0
20140108	23.500000	23.570000	22.969999	23.010000	18.214798	6737300.0
....
20170920	34.660000	35.130001	34.310001	34.759998	34.759998	12143700.0
20170921	35.400002	35.970001	35.220001	35.520000	35.520000	16532900.0
20170922	35.369999	35.419998	35.020000	35.299999	35.299999	5420100.0
20170925	35.619999	35.740002	34.759998	34.799999	34.799999	5305700.0
20170926	35.160000	35.279999	34.950001	35.160000	35.160000	1947000.0
>>> today = '20170926'  # to make static this script.
>>> tckr = 'BBAS3.SA'  # Banco do Brasil SA
# download data
>>> data = pdr.get_data_yahoo(tckr, 2014, today)
>>> data = data.asfreq('B')  # add frequency needed for some pandas functionalities releated with offsets
>>> data.columns = data.columns.map(lambda col: col.lower())
# load packages
import pandas as pd
import numpy as np
import pandas_datareader as pdr
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt
from IPython.display import set_matplotlib_formats
# %matplotlib inline

# ploting setup
plt.style.use(['seaborn-white', 'seaborn-paper'])
matplotlib.rc('font', family='Times New Roman', size=15)
set_matplotlib_formats('png', 'png', quality=80)
plt.rcParams['savefig.dpi'] = 100
plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = 10, 6
plt.rcParams['axes.labelsize'] = 10
plt.rcParams['axes.titlesize'] = 15
plt.rcParams['font.size'] = 12
plt.rcParams['lines.linewidth'] = 1.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['ytick.labelsize'] = 11
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.serif'] = 'cm'
plt.rcParams['axes.grid'] = True
kw_save = dict(bbox_iches='tight', transparent=True)

# asset information
asset_info = '''
Banco do Brasil S.A. (BBAS3.SA)
Sao Paolo - Sao Paolo Delayed Price. Currency in BRL.
Source: https://finance.yahoo.com/quote/BBAS3.SA
'''

# useful functions
# ================
def total_return(prices):
    """Retuns the return between the first and last value of the DataFrame.

    Parameters
    ----------
    prices : pandas.Series or pandas.DataFrame

    Returns
    -------
    total_return : float or pandas.Series
        Depending on the input passed returns a float or a pandas.Series.
    """
    return prices.iloc[-1] / prices.iloc[0] - 1


def total_return_from_returns(returns):
    """Retuns the return between the first and last value of the DataFrame.

    Parameters
    ----------
    returns : pandas.Series or pandas.DataFrame

    Returns
    -------
    total_return : float or pandas.Series
        Depending on the input passed returns a float or a pandas.Series.
    """
    return (returns + 1).prod() - 1


def plot_this(df, title, figsize=None, ylabel='',
             output_file='imgs/fig_rets_approach1.png', bottom_adj=0.25,
             txt_ymin=-0.4, bar=False):
    if bar:
        ax = df.plot.bar(title=title, figsize=figsize)
    else:
        ax = df.plot(title=title, figsize=figsize)
    sns.despine()
    plt.ylabel(ylabel)
    plt.tight_layout()
    plt.text(0, txt_ymin, asset_info, transform=ax.transAxes, fontsize=9)
    plt.gcf().subplots_adjust(bottom=bottom_adj)
    plt.savefig(output_file, **kw_save)

# Geting data
# ===========
today = '20170926'  # to make static this script.
tckr = 'BBAS3.SA'  # Banco do Brasil SA
# download data
data = pdr.get_data_yahoo(tckr, 2014, today)
data = data.asfreq('B')  # add frequency needed for some pandas functionalities releated with offsets
data.columns = data.columns.map(lambda col: col.lower())
data.head()  # first values
data.tail()  # last values

# what about NaNs
data.isnull().sum()
data.ffill(inplace=True)  # to avoid problems with NaNs.

# using close prices
prices = data.close.copy()
# we convert to DataFrame to make easy store more series.
results_storage = prices.to_frame().copy()

# plotting
plot_this(prices, title='Prices of %s' % tckr, ylabel='Prices in BRL',
          txt_ymin=-0.2, bottom_adj=0.15, output_file='imgs/fig_prices.png',)

# extract some date information
results_storage['year'] = prices.index.year
results_storage['month'] = prices.index.month
results_storage['day'] = prices.index.day
results_storage['week_day'] = prices.index.dayofweek
results_storage['week_day_name'] = prices.index.strftime('%A')
results_storage.tail(10)

# ================
# Trailing Returns
# ================

# Approach 1: starting from prices
# ================================
approach1 = results_storage.groupby(['year', 'month'], )['close']\
                           .apply(total_return)
approach1.tail(10)

# ploting
# -------
plot_this(approach1, bar=True, title='Trailing returns: Approach 1',
          ylabel='Returns (parts per unit)', txt_ymin=-0.4, bottom_adj=0.25,
          output_file='imgs/fig_rets_approach1.png')

# Nota bene: What means approach 1:
# means that we are selecting all available prices INSIDE a month and then we
# calculate the total return with that prices.
select_idx = (2017, 8)
idx_approach1 = results_storage.groupby(['year', 'month'])['close']\
                               .groups[select_idx]
last_group = results_storage.loc[idx_approach1]
last_group.head()
last_group.tail()

# example of the calculation
total_return(last_group.close)
approach1.loc[select_idx]

# Approach 2: starting from daily returns
# =======================================
r = prices.pct_change()
approach2 = r.groupby((r.index.year, r.index.month))\
             .apply(total_return_from_returns)

approach2.tail(10)

plot_this(approach2, bar=True, title='Trailing returns: Approach 2',
          ylabel='Returns (parts per unit)', txt_ymin=-0.4, bottom_adj=0.25,
          output_file='imgs/fig_rets_approach2.png')

# However, this approximation is almost correct since we have started from 2014
# prices, therefore it is not possible to calculate the return of the first
# month of that first available year.
# This same situation occurs in the last month available (current), as no data
# are available for the last day of the month, the return is also not
# comparable with the rest.



# Approach 3: Now yes, the definitive approach
# ============================================
# ... what mean approach3, first trimm prices according to end of month this
# then with that prices calculate returns. So we are calculating returns between
# different months and mostly in differents days whichs always are the last
# bussiness day of the month.

# with "asfreq" we decimate the prices, then group by year and month, so we
# have all the prices at the end of the working month available in the DataFrame.
# Finally we calculate the return of this new series with "pct_change".
approach3 = results_storage.asfreq('BM')\
                           .set_index(['year', 'month'])\
                           .close\
                           .pct_change()
approach3.tail(10)
plot_this(approach3, bar=True, title='Trailing returns: Approach 3',
          ylabel='Returns (parts per unit)', txt_ymin=-0.4, bottom_adj=0.25,
          output_file='imgs/fig_rets_approach3.png')


# Comparing all approaches
# ========================
all_approaches = pd.concat([approach1, approach2, approach3], axis=1,
                           keys=['approach1', 'approach2', 'approach3'])

plot_this(all_approaches, title='Comparing all approaches',
          output_file='imgs/all_approaches.png', bar=True,
          ylabel='Returns (parts per unit)', figsize=(15,8), bottom_adj=0.2,
          txt_ymin=-0.3)



# 
# useful functions
# ================
def total_return(prices):
    """Retuns the return between the first and last value of the DataFrame.
    Parameters
    ----------
    prices : pandas.Series or pandas.DataFrame
    Returns
    -------
    total_return : float or pandas.Series
        Depending on the input passed returns a float or a pandas.Series.
    """
    return prices.iloc[-1] / prices.iloc[0] - 1


def total_return_from_returns(returns):
    """Retuns the return between the first and last value of the DataFrame.
    Parameters
    ----------
    returns : pandas.Series or pandas.DataFrame
    Returns
    -------
    total_return : float or pandas.Series
        Depending on the input passed returns a float or a pandas.Series.
    """
    return (returns + 1).prod() - 1


def plot_this(df, title, figsize=None, ylabel='',
             output_file='imgs/fig_rets_approach1.png', bottom_adj=0.25,
             txt_ymin=-0.4, bar=False):
    if bar:
        ax = df.plot.bar(title=title, figsize=figsize)
    else:
        ax = df.plot(title=title, figsize=figsize)
    sns.despine()
    plt.ylabel(ylabel)
    plt.tight_layout()
    plt.text(0, txt_ymin, asset_info, transform=ax.transAxes, fontsize=9)
    plt.gcf().subplots_adjust(bottom=bottom_adj)
    plt.savefig(output_file, **kw_save)