In this example, we consider the stochastic volatility model SV0 for application e.g. in finance.
Reference: S. Chib, F. Nardari, N. Shepard. Markov chain Monte Carlo methods for stochastic volatility models. Journal of econometrics, vol. 108, pp. 281-316, 2002.
The stochastic volatility model is defined as follows
\[ \alpha\sim \mathcal N (0, .0001),~~~\] \[ logit(\beta) \sim \mathcal N (0, 10),~~~\] \[ log(\sigma) \sim \mathcal N (0, 1)\]
and for \(t\leq t_{max}\)
\[x_t|(x_{t-1},\alpha,\beta,\sigma) \sim \mathcal N (\alpha + \beta(x_{t-1} -\alpha), \sigma^2)\]
\[ y_t|x_t \sim \mathcal N (0, exp(x_t)) \]
where \(y_t\) is the response variable and \(x_t\) is the unobserved log-volatility of \(y_t\). \(\mathcal N(m,\sigma^2)\) denotes the normal distribution of mean \(m\) and variance \(\sigma^2\).
\(\alpha\), \(\beta\) and \(\sigma\) are unknown parameters that need to be estimated.
Content of the file 'stoch_volatility.bug'
:
model_file = 'stoch_volatility.bug' # BUGS model filename
cat(readLines(model_file), sep = "\n")
# Stochastic volatility model SV_0
# Reference: S. Chib, F. Nardari, N. Shepard. Markov chain Monte Carlo methods
# for stochastic volatility models. Journal of econometrics, vol. 108, pp. 281-316, 2002.
var y[t_max], x[t_max], prec_y[t_max]
data
{
x_true[1] ~ dnorm(0, 1/sigma_true^2)
prec_y_true[1] <- exp(-x_true[1])
y[1] ~ dnorm(0, prec_y_true[1])
for (t in 2:t_max)
{
x_true[t] ~ dnorm(alpha_true + beta_true*(x_true[t-1]-alpha_true), 1/sigma_true^2)
prec_y_true[t] <- exp(-x_true[t])
y[t] ~ dnorm(0, prec_y_true[t])
}
}
model
{
alpha ~ dnorm(0,10000)
logit_beta ~ dnorm(0,.1)
beta <- ilogit(logit_beta)
log_sigma ~ dnorm(0, 1)
sigma <- exp(log_sigma)
x[1] ~ dnorm(0, 1/sigma^2)
prec_y[1] <- exp(-x[1])
y[1] ~ dnorm(0, prec_y[1])
for (t in 2:t_max)
{
x[t] ~ dnorm(alpha + beta*(x[t-1]-alpha), 1/sigma^2)
prec_y[t] <- exp(-x[t])
y[t] ~ dnorm(0, prec_y[t])
}
}
require(rbiips)
par(bty='l')
light_blue = rgb(.7, .7, 1)
light_red = rgb(1, .7, .7)
Set the random numbers generator seed for reproducibility
set.seed(0)
sample_data = TRUE # Simulated data or SP500 data
t_max = 100
if (!sample_data) {
# Load the data
tab = read.csv('SP500.csv', sep = ";")
y = diff(log(rev(tab$Close)))
SP500_date_str = rev(tab$Date[-1])
ind = 1:t_max
y = y[ind]
SP500_date_str = SP500_date_str[ind]
SP500_date_num = as.Date(SP500_date_str)
}
if (!sample_data) {
data = list(t_max=t_max, y=y)
} else {
sigma_true = .4; alpha_true = 0; beta_true=.99;
data = list(t_max=t_max, sigma_true=sigma_true,
alpha_true=alpha_true, beta_true=beta_true)
}
model = biips_model(model_file, data, sample_data=sample_data)
* Parsing model in: stoch_volatility.bug
* Compiling data graph
Declaring variables
Resolving undeclared variables
Allocating nodes
Graph size: 706
Sampling data
Reading data back into data table
* Compiling model graph
Declaring variables
Resolving undeclared variables
Allocating nodes
Graph size: 715
data = model$data()
Plot the data
if (sample_data) {
plot(1:t_max, data$y, type='l', col='blue', lwd=2,
main='Observed data',
xlab='Time', ylab='Log-return', xaxt = 'n')
} else {
plot(SP500_date_num, data$y, type='l', col='blue', lwd=2,
main='Observed data: S&P 500',
xlab='Date', ylab='Log-return', xaxt = 'n')
axis.Date(1, SP500_date_num, format="%Y-%m-%d")
}
We now use Biips to run a Particle Marginal Metropolis-Hastings in order to obtain posterior MCMC samples of the parameters \(\alpha\), \(\beta\) and \(\sigma\), and of the variables \(x\).
n_burn = 5000 # nb of burn-in/adaptation iterations
n_iter = 10000 # nb of iterations after burn-in
thin = 5 # thinning of MCMC outputs
n_part = 50 # nb of particles for the SMC
param_names = c('alpha', 'logit_beta', 'log_sigma') # names of the variables updated with MCMC (others are updated with SMC)
latent_names = c('x') # names of the variables updated with SMC and that need to be monitored
inits = list(0, 5, -2)
obj_pmmh = biips_pmmh_init(model, param_names, inits=inits,
latent_names=latent_names) # creates a pmmh object
* Initializing PMMH
biips_pmmh_update(obj_pmmh, n_burn, n_part) # adaptation and burn-in iterations
* Adapting PMMH with 50 particles
|--------------------------------------------------| 100%
|++++++++++++++++++++++++++++++++++++++++++++++++++| 5000 iterations in 161.60 s
out_pmmh = biips_pmmh_samples(obj_pmmh, n_iter, n_part, thin=thin) # samples
* Generating 2000 PMMH samples with 50 particles
|--------------------------------------------------| 100%
|**************************************************| 10000 iterations in 325.78 s
summ_pmmh = biips_summary(out_pmmh, probs=c(.025, .975))
kde_pmmh = biips_density(out_pmmh)
for (k in 1:length(param_names)) {
summ_param = summ_pmmh[[param_names[k]]]
cat('Posterior mean of ', param_names[k], ': ', summ_param$mean, '\n', sep='');
cat('95% credible interval of ', param_names[k], ': [', summ_param$quant[[1]], ', ', summ_param$quant[[2]],']\n', sep='')
}
Posterior mean of alpha: 1.601023e-05
95% credible interval of alpha: [-0.01844366, 0.01830881]
Posterior mean of logit_beta: 5.722639
95% credible interval of logit_beta: [3.510561, 9.061323]
Posterior mean of log_sigma: -0.8749074
95% credible interval of log_sigma: [-1.352958, -0.3836928]
if (sample_data)
param_true = c(alpha_true, log(data$beta_true/(1-data$beta_true)), log(sigma_true))
param_lab = expression(alpha, logit(beta), log(sigma))
for (k in 1:length(param_names)) {
samples_param = out_pmmh[[param_names[k]]]
plot(samples_param[1,], type='l', col='blue', lwd=1,
xlab='Iteration', ylab=param_lab[k],
main=param_lab[k])
if (sample_data)
points(0, param_true[k], col='green', pch=8, lwd=2)
}
for (k in 1:length(param_names)) {
samples_param = out_pmmh[[param_names[k]]]
hist(samples_param, breaks=15, col='blue', border='white',
xlab=param_lab[k], ylab='Number of samples',
main=param_lab[k])
if (sample_data)
points(param_true[k], 0, col='green', pch=8, lwd=2)
}
for (k in 1:length(param_names)) {
kde_param = kde_pmmh[[param_names[k]]]
plot(kde_param, col='blue', lwd=2,
xlab=param_lab[k], ylab='Posterior density',
main=param_lab[k])
if (sample_data)
points(param_true[k], 0, col='green', pch=8, lwd=2)
}
x_pmmh_mean = summ_pmmh$x$mean
x_pmmh_quant = summ_pmmh$x$quant
xx = c(1:t_max, t_max:1)
yy = c(x_pmmh_quant[[1]], rev(x_pmmh_quant[[2]]))
plot(xx, yy, type='n', xlab='Time', ylab='Log-volatility')
polygon(xx, yy, col=light_blue, border=NA)
lines(1:t_max, x_pmmh_mean, col='blue', lwd=3)
if (sample_data) {
lines(1:t_max, data$x_true, col='green', lwd=2)
legend('topright', leg=c('95% credible interval', 'PMMH mean estimate', 'True value'),
col=c(light_blue,'blue','green'), lwd=c(NA,3,2), pch=c(15,NA,NA), pt.cex=c(2,1,1),
bty='n')
} else
legend('topright', leg=c('95% credible interval', 'PMMH mean estimate'),
col=c(light_blue,'blue'), lwd=c(NA,3), pch=c(15,NA), pt.cex=c(2,1),
bty='n')
time_index = c(5, 10, 15)
par(mfrow=c(2,2))
for (k in 1:length(time_index)) {
tk = time_index[k]
plot(c(out_pmmh$x[tk,]), col='blue', type='l',
xlab='Iteration',
ylab=bquote(x[.(tk)]),
main=paste('t=', tk, sep=''))
if (sample_data)
points(0, data$x_true[tk], col='green', pch=8, lwd=2)
}
if (sample_data) {
plot(0, type='n', bty='n', xaxt='n', yaxt='n', xlab="", ylab="")
legend('center', leg=c('PMMH samples', 'True value'),
col=c('blue', 'green'), pch=c(NA,8), lty=c(1,NA), lwd=c(1,2),
bty='n')
}
par(mfrow=c(1,1))
par(mfrow=c(2,2))
for (k in 1:length(time_index)) {
tk = time_index[k]
hist(out_pmmh$x[tk,], breaks=30, col='blue', border='white',
xlab=bquote(x[.(tk)]),
ylab='Number of samples',
main=paste('t=', tk, sep=''))
if (sample_data)
points(data$x_true[tk], 0, col='green', pch=8, lwd=2)
}
if (sample_data) {
plot(0, type='n', bty='n', xaxt='n', yaxt='n', xlab="", ylab="")
legend('center', leg=c('Posterior samples', 'True value'),
col=c('blue', 'green'), pch=c(22,8), lwd=c(1,2), lty=NA, pt.cex=c(2,1), pt.bg=c(4,NA),
bty='n')
}
par(mfrow=c(1,1))
par(mfrow=c(2,2))
for (k in 1:length(time_index)) {
tk = time_index[k]
plot(kde_pmmh$x[[tk]], col='blue', lwd=2,
xlab=bquote(x[.(tk)]),
ylab='Posterior density',
main=paste('t=', tk, sep=''))
if (sample_data)
points(data$x_true[tk], 0, col='green', pch=8, lwd=2)
}
if (sample_data) {
plot(0, type='n', bty='n', xaxt='n', yaxt='n', xlab="", ylab="")
legend('center', leg=c('Posterior density', 'True value'),
col=c('blue', 'green'), pch=c(NA,8), lwd=2, lty=c(1,NA), pt.cex=c(2,1), pt.bg=c(4,NA),
bty='n')
}
par(mfrow=c(1,1))