# In this demonstration, I estimate a discrete-time NGARCH model.
# I first simulate the model with known parameters, then try to
# recover the parameters from simulated data. Then I apply the
# model to weekly returns for Google.
# The model seems to be able to recover most of the parameters
# fairly well, with inference for alpha, beta fairly week. gamma
# does not seem to be well identified.
# jim savage, james@lendable.io
library(ggplot2); library(Quandl); library(dplyr); library(rstan)
# Get some data to play with
Goog <- Quandl("YAHOO/GOOG", collapse = "weekly") %>%
arrange(Date)
# Take log difference
R <- diff(log(Goog$`Adjusted Close`))
# Generate some fake data
# Sample parameters from priors
N <- 500
r <- rnorm(1, 0, 0.01);
h0 <- sqrt(rnorm(1, .005, .005)^2); // no idea!
dW <- matrix(rnorm(N*2, 0, 1), N, 2); // if W is Weiner, one-step increments ~ normal(0, 1)
lambda <- rnorm(1, 0.1, .1);
alpha <- sqrt(rnorm(1, 0, .1)^2); # constrain positive
beta <- -1*sqrt(rnorm(1, 0, .2)^2); # constrain negative
gamma <- rnorm(1, 0, .2);
omega <- rnorm(1, 0, .01);
h <- rep(NA, N)
R3 <- rep(NA, N)
for(t in 1:N) {
if(t==1){
h[t] <- (omega + h0)*(1/(1 - (beta + alpha*(1 + gamma^2) - 2*gamma*alpha*dW[t,1] - sqrt(2)*alpha*dW[t,2])));
} else {
h[t] <- (omega + h[t-1])*(1/(1 - (beta + alpha*(1 + gamma^2) - 2*gamma*alpha*dW[t,1] - sqrt(2)*alpha*dW[t,2])));
}
R3[t] <- rnorm(1, r + lambda * sqrt(h[t]) - .5*h[t], sqrt(h[t]))
}
# Declare a model: you should really do this in
# a stan file
model <- "data{
int N;
vector[N] R;
}
parameters {
real r;
real lambda;
matrix[N, 2] dW;
real gamma;
// I don't think alpha is sign identified
real<lower = 0> alpha;
real omega;
real<upper = 0> beta;
real<lower = 0> h0;
}
transformed parameters {
vector[N] h;
{
for(t in 1:N) {
if(t==1){
h[t] <- (omega + h0)*(1/(1 - (beta + alpha*(1 + pow(gamma, 2)) - 2*gamma*alpha*dW[t,1] - sqrt(2)*alpha*dW[t,2])));
} else {
h[t] <- (omega + h[t-1])*(1/(1 - (beta + alpha*(1 + pow(gamma, 2)) - 2*gamma*alpha*dW[t,1] - sqrt(2)*alpha*dW[t,2])));
}
}
}
}
model {
// priors
r ~ normal(0, 0.01);
h0 ~ normal(0.05, 0.05); // no idea!
to_vector(dW) ~ normal(0, 1); // if W is Weiner, one-step increments ~ normal(0, 1)
lambda ~ normal(0.1, .1);
alpha ~ normal(0, 1);
beta ~ normal(0, 1);
gamma ~ normal(0, .2);
omega ~ normal(0, .01);
// measurement model
for(t in 1:N){
R[t] ~ normal(r + lambda * sqrt(h[t]) - .5*h[t], sqrt(h[t]));
}
}"
# Compile your model
compiled_model <- stan_model(model_code = model)
# Sample from the model (takes 5 mins or so on my computer)
test_hmc <- sampling(compiled_model, data = list(R = R3, N = N), cores = 4, iter = 2000)
print(test_hmc, pars = c("alpha", "beta", "omega", "r", "lambda", "gamma"))
print(c(alpha, beta, omega, r, lambda, gamma))
pars <- extract(test_hmc, permuted = F) %>% plyr::adply(2) %>% dplyr::select(contains("h["))
# Get 95% credibility interval
par_interval <- do.call(bind_rows, apply(pars, 2, function(x) data_frame(median = median(sqrt(x))))) %>%
mutate(real_h = h)
# Plot the estimated and known volatility measures
plot(par_interval$median, par_interval$real_h)
# Now run the model on Google
google_model <- sampling(compiled_model, data = list(R = R, N = length(N)), cores = 4, iter = 2000)
# Grab the parameters you want
pars <- extract(google_model, permuted = F) %>% plyr::adply(2) %>% dplyr::select(contains("h["))
# Get 95% credibility interval
par_interval <- do.call(bind_rows, apply(pars, 2, function(x) data_frame(median = median(sqrt(x)),
lower = quantile(sqrt(x), 0.025),
upper = quantile(sqrt(x), 0.975)))) %>%
mutate(time = Goog$Date[-1])
# Plot the volatility
par_interval %>%
filter(time > "2005-06-01") %>%
ggplot(aes(x = time)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = "orange", alpha = 0.3) +
geom_line(aes(y = median)) +
ylab("Volatility") +
xlab("Date")