alexbernal0
4/20/2017 - 2:25 AM

## Python Linear Regression

Python Linear Regression

{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.frame.DataFrame"
]
},
"execution_count": 2,
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import datetime\n",
" \n",
"# We will look at stock prices over the past year, starting at January 1, 2016\n",
"start = datetime.datetime(2016,1,1)\n",
"end = datetime.date.today()\n",
" \n",
"# Let's get Apple stock data; Apple's ticker symbol is AAPL\n",
"# First argument is the series we want, second is the source (\"yahoo\" for Yahoo! Finance), third is the start date, fourth is the end date\n",
"apple = web.DataReader(\"AAPL\", \"yahoo\", start, end)\n",
" \n",
"type(apple)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"109.89705023076922"
]
},
"execution_count": 8,
"output_type": "execute_result"
}
],
"source": [
"#Avg Stock Value\n",
]
},
{
"cell_type": "code",
"execution_count": 9,
"collapsed": true
},
"outputs": [],
"source": [
"#Stock Volatility"
]
},
{
"cell_type": "code",
"execution_count": 13,
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.text.Text at 0xb6bc710>"
]
},
"execution_count": 13,
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"<matplotlib.figure.Figure at 0xb5557f0>"
]
},
"output_type": "display_data"
}
],
"source": [
"#Daily Stock Return\n",
"ret = apple[\"Adj Close\"].pct_change()  # calculating returns is also easy with pandas. Just call .pct_change() \n",
"(100*ret).plot() # plot % return\n",
"ylabel('close-close % change')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.217321742942\n"
]
}
],
"source": [
" # Compute daily volatility  \n",
"historical_vol_daily = np.std(ret,axis=0)  \n",
"\n",
"# Convert daily volatility to annual volatility, assuming 252 trading days  \n",
"historical_vol_annually = historical_vol_daily*math.sqrt(252)  \n",
"\n",
"# Return estimate of annual volatility  \n",
"print(historical_vol_annually)"
]
},
{
"cell_type": "markdown",
"source": [
"Linear regression"
]
},
{
"cell_type": "code",
"execution_count": 29,
"collapsed": true
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"from pylab import *\n",
"import pandas as pd\n",
"import datetime\n",
"import sys\n",
"import numpy as np\n",
"import scipy as sp\n",
"import pylab\n",
"from scipy.optimize import leastsq"
]
},
{
"cell_type": "code",
"execution_count": 31,
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import datetime\n",
" \n",
"# We will look at stock prices over the past year, starting at January 1, 2016\n",
"start = datetime.datetime(2015,4,1)\n",
"end = datetime.datetime(2015,6,1)\n",
" \n",
"# Let's get Apple stock data; Apple's ticker symbol is AAPL\n",
"# First argument is the series we want, second is the source (\"yahoo\" for Yahoo! Finance), third is the start date, fourth is the end date\n",
"jpm = web.DataReader(\"JPM\", \"yahoo\", start, end)\n",
"spy = web.DataReader(\"SPY\", \"yahoo\", start, end)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"# we're using yahoo finance data, pandas datareader will import the data we need\n",
"\n",
]
},
{
"cell_type": "code",
"execution_count": 59,
"collapsed": false
},
"outputs": [],
"source": [
"symbols = [\"JPM\", \"SPY\"]\n",
"# df is the main dataframe that'll hold the Adjusted closing prices\n",
"df = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"                  JPM         SPY\n",
"Date                             \n",
"2015-04-01  56.797905  197.326422\n",
"2015-04-02  57.337935  198.036303\n",
"2015-04-06  57.290564  199.369719\n",
"2015-04-07  57.650582  198.842105\n",
"2015-04-08  57.840067  199.513607\n"
]
}
],
"source": [
"for symbol in symbols:\n",
"    \n",
"    # we only need the adjusted close price.\n",
"\n",
"# lets take a look,\n",
]
},
{
"cell_type": "code",
"execution_count": 64,
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"formula: y = [ 1.21354543]x + [ 125.97901614]\n"
]
}
],
"source": [
"npMatrix = np.matrix(df)\n",
"X, Y = npMatrix[:,0], npMatrix[:,1]\n",
"mdl = LinearRegression().fit(X,Y) # either this or the next line\n",
"#mdl = LinearRegression().fit(filtered_data[['x']],filtered_data.y)\n",
"m = mdl.coef_[0]\n",
"b = mdl.intercept_\n",
"print \"formula: y = {0}x + {1}\".format(m, b) # following slope intercept form"
]
},
{
"cell_type": "code",
"execution_count": 66,
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.figure.Figure at 0xe489390>"
]
},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(X,Y, color='blue')\n",
"plt.plot([0,100],[b,m*100+b],'r')\n",
"plt.title('Linear Regression JPM / SPY', fontsize = 20)\n",
"plt.xlabel('JPM', fontsize = 15)\n",
"plt.ylabel('SPY', fontsize = 15)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"collapsed": true
},
"outputs": [],
"source": []
}
],
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}