In this tutorial, we consider applying sequential Monte Carlo methods for Bayesian inference in a nonlinear non-Gaussian hidden Markov model.
The statistical model is defined as follows.
\[ x_1\sim \mathcal N\left (\mu_0, \frac{1}{\lambda_0}\right )\]
\[ y_1\sim \mathcal N\left (h(x_1), \frac{1}{\lambda_y}\right )\]
For \(t=2:t_{max}\)
\[ x_t|x_{t-1} \sim \mathcal N\left ( f(x_{t-1},t-1), \frac{1}{\lambda_x}\right )\]
\[ y_t|x_t \sim \mathcal N\left ( h(x_{t}), \frac{1}{\lambda_y}\right )\]
where \(\mathcal N\left (m, S\right )\) denotes the Gaussian distribution of mean \(m\) and covariance matrix \(S\), \(h(x)=x^2/20\), \(f(x,t-1)=0.5 x+25 x/(1+x^2)+8 \cos(1.2 (t-1))\), \(\mu_0=0\), \(\lambda_0 = 5\), \(\lambda_x = 0.1\) and \(\lambda_y=1\).
We describe the model in BUGS language in the file 'hmm_1d_nonlin.bug'
:
model_file = 'hmm_1d_nonlin.bug' # BUGS model filename
cat(readLines(model_file), sep = "\n")
var x_true[t_max], x[t_max], y[t_max]
data
{
x_true[1] ~ dnorm(mean_x_init, prec_x_init)
y[1] ~ dnorm(x_true[1]^2/20, prec_y)
for (t in 2:t_max)
{
x_true[t] ~ dnorm(0.5*x_true[t-1]+25*x_true[t-1]/(1+x_true[t-1]^2)+8*cos(1.2*(t-1)), prec_x)
y[t] ~ dnorm(x_true[t]^2/20, prec_y)
}
}
model
{
x[1] ~ dnorm(mean_x_init, prec_x_init)
y[1] ~ dnorm(x[1]^2/20, prec_y)
for (t in 2:t_max)
{
x[t] ~ dnorm(0.5*x[t-1]+25*x[t-1]/(1+x[t-1]^2)+8*cos(1.2*(t-1)), prec_x)
y[t] ~ dnorm(x[t]^2/20, prec_y)
}
}
require(rbiips)
par(bty='l')
light_blue = rgb(.7, .7, 1)
light_red = rgb(1, .7, .7)
Set the random numbers generator seed for reproducibility
set.seed(2)
t_max = 20
mean_x_init = 0
prec_x_init = 1/5
prec_x = 1/10
prec_y = 1
data = list(t_max=t_max, prec_x_init=prec_x_init,
prec_x=prec_x, prec_y=prec_y, mean_x_init=mean_x_init)
sample_data = TRUE # Boolean
model = biips_model(model_file, data, sample_data=sample_data) # Create Biips model and sample data
* Parsing model in: hmm_1d_nonlin.bug
* Compiling data graph
Declaring variables
Resolving undeclared variables
Allocating nodes
Graph size: 279
Sampling data
Reading data back into data table
* Compiling model graph
Declaring variables
Resolving undeclared variables
Allocating nodes
Graph size: 280
data = model$data()
Let now use Biips to run a particle filter.
We want to monitor the variable x
, and to get the filtering and smoothing particle approximations. The algorithm will use 10000 particles, stratified resampling, with a threshold of 0.5.
n_part = 10000 # Number of particles
variables = c('x') # Variables to be monitored
mn_type = 'fs'; rs_type = 'stratified'; rs_thres = 0.5 # Optional parameters
out_smc = biips_smc_samples(model, variables, n_part,
type=mn_type, rs_type=rs_type, rs_thres=rs_thres)
* Assigning node samplers
* Running SMC forward sampler with 10000 particles
|--------------------------------------------------| 100%
|**************************************************| 20 iterations in 1.90 s
diag_smc = biips_diagnosis(out_smc)
* Diagnosis of variable: x[1:20]
Filtering: GOOD
Smoothing: GOOD
The sequence of filtering distributions is automatically chosen by Biips based on the topology of the graphical model, and is returned in the subfield f$conditionals
. For this particular example, the sequence of filtering distributions is \(\pi(x_{t}|y_{1:t})\), for \(t=1,\ldots,t_{max}\).
cat('Filtering distributions:\n')
for (i in 1:length(out_smc$x$f$conditionals)) {
cat(out_smc$x$f$iterations[i], ': x[', i, '] | ', sep='');
cat(paste(out_smc$x$f$conditionals[[i]], collapse=','), '\n');
}
Filtering distributions:
1: x[1] | y[1]
2: x[2] | y[1],y[2]
3: x[3] | y[1],y[2],y[3]
4: x[4] | y[1],y[2],y[3],y[4]
5: x[5] | y[1],y[2],y[3],y[4],y[5]
6: x[6] | y[1],y[2],y[3],y[4],y[5],y[6]
7: x[7] | y[1],y[2],y[3],y[4],y[5],y[6],y[7]
8: x[8] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8]
9: x[9] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9]
10: x[10] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10]
11: x[11] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11]
12: x[12] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12]
13: x[13] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13]
14: x[14] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14]
15: x[15] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15]
16: x[16] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16]
17: x[17] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17]
18: x[18] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18]
19: x[19] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19]
20: x[20] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
while the smoothing distributions are \(\pi(x_{t}|y_{1:t_{max}})\), for \(t=1,\ldots,t_{max}\).
cat('Smoothing distributions:\n')
for (i in 1:length(out_smc$x$s$iterations)) {
cat('x[', i, '] | ', sep='');
cat(paste(out_smc$x$s$conditionals, collapse=','), '\n');
}
Smoothing distributions:
x[1] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[2] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[3] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[4] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[5] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[6] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[7] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[8] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[9] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[10] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[11] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[12] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[13] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[14] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[15] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[16] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[17] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[18] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[19] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
x[20] | y[1],y[2],y[3],y[4],y[5],y[6],y[7],y[8],y[9],y[10],y[11],y[12],y[13],y[14],y[15],y[16],y[17],y[18],y[19],y[20]
summ_smc = biips_summary(out_smc, probs=c(.025, .975))
x_f_mean = summ_smc$x$f$mean
x_f_quant = summ_smc$x$f$quant
xx = c(1:t_max, t_max:1)
yy = c(x_f_quant[[1]], rev(x_f_quant[[2]]))
plot(xx, yy, type='n', xlab='Time', ylab='x')
polygon(xx, yy, col=light_blue, border=NA)
lines(1:t_max, x_f_mean, col='blue', lwd=3)
lines(1:t_max, data$x_true, col='green', lwd=2)
legend('topright', leg=c('95% credible interval', 'Filtering mean estimate', 'True value'),
col=c(light_blue,'blue','green'), lwd=c(NA,3,2), pch=c(15,NA,NA), pt.cex=c(2,1,1),
bty='n')
x_s_mean = summ_smc$x$s$mean
x_s_quant = summ_smc$x$s$quant
xx = c(1:t_max, t_max:1)
yy = c(x_s_quant[[1]], rev(x_s_quant[[2]]))
plot(xx, yy, type='n', xlab='Time', ylab='x')
polygon(xx, yy, col=light_red, border=NA)
lines(1:t_max, x_s_mean, col='red', lwd=3)
lines(1:t_max, data$x_true, col='green', lwd=2)
legend('topright', leg=c('95% credible interval', 'Smoothing mean estimate', 'True value'),
col=c(light_red,'red','green'), lwd=c(NA,3,2), pch=c(15,NA,NA), pt.cex=c(2,1,1), bty='n')
kde_smc = biips_density(out_smc)
time_index = c(5, 10, 15)
par(mfrow=c(2,2))
for (k in 1:length(time_index)) {
tk = time_index[k]
plot(kde_smc$x[[tk]], col=c('blue', 'red'), lwd=2,
xlab=bquote(x[.(tk)]), ylab='Posterior density',
main=paste('t=', tk, sep=''))
points(data$x_true[tk], 0, col='green', pch=8, lwd=2)
}
plot(0, type='n', bty='n', xaxt='n', yaxt='n', xlab="", ylab="")
legend('center', leg=c('Filtering density', 'Smoothing density', 'True value'),
col=c('blue', 'red', 'green'), pch=c(NA,NA,8), lty=c(1,1,NA), lwd=2,
bty='n')
par(mfrow=c(1,1))
We now use Biips to run a Particle Independent Metropolis-Hastings.
n_burn = 500
n_iter = 500
thin = 1
n_part = 100
obj_pimh = biips_pimh_init(model, variables)
* Initializing PIMH
biips_pimh_update(obj_pimh, n_burn, n_part) # burn-in iterations
* Updating PIMH with 100 particles
|--------------------------------------------------| 100%
|**************************************************| 500 iterations in 6.81 s
out_pimh = biips_pimh_samples(obj_pimh, n_iter, n_part, thin=thin)
* Generating PIMH samples with 100 particles
|--------------------------------------------------| 100%
|**************************************************| 500 iterations in 6.28 s
summ_pimh = biips_summary(out_pimh, probs=c(.025, .975))
x_pimh_mean = summ_pimh$x$mean
x_pimh_quant = summ_pimh$x$quant
xx = c(1:t_max, t_max:1)
yy = c(x_pimh_quant[[1]], rev(x_pimh_quant[[2]]))
plot(xx, yy, type='n', xlab='Time', ylab='x')
polygon(xx, yy, col=light_blue, border=NA)
lines(1:t_max, x_pimh_mean, col='blue', lwd=3)
lines(1:t_max, data$x_true, col='green', lwd=2)
legend('topright', leg=c('95% credible interval', 'PIMH mean estimate', 'True value'),
col=c(light_blue,'blue','green'), lwd=c(NA,3,2), pch=c(15,NA,NA), pt.cex=c(2,1,1),
bty='n')
time_index = c(5, 10, 15)
par(mfrow=c(2,2))
for (k in 1:length(time_index)) {
tk = time_index[k]
plot(out_pimh$x[tk,], col='blue', type='l',
xlab='Iteration', ylab=bquote(x[.(tk)]),
main=paste('t=', tk, sep=''))
points(0, data$x_true[tk], col='green', pch=8, lwd=2)
}
plot(0, type='n', bty='n', xaxt='n', yaxt='n', xlab="", ylab="")
legend('center', leg=c('PIMH samples', 'True value'),
col=c('blue', 'green'), pch=c(NA,8), lwd=c(1,2), lty=c(1,NA),
bty='n')
par(mfrow=c(2,2))
for (k in 1:length(time_index)) {
tk = time_index[k]
hist(out_pimh$x[tk,], breaks=20, col='blue', border='white',
xlab=bquote(x[.(tk)]), ylab='Number of samples',
main=paste('t=', tk, sep=''))
points(data$x_true[tk], 0, col='green', pch=8, lwd=2)
}
plot(0, type='n', bty='n', xaxt='n', yaxt='n', xlab="", ylab="")
legend('center', leg=c('Posterior density', 'True value'),
col=c('blue', 'green'), pch=c(22,8), lwd=c(NA,2), lty=NA, pt.cex=c(2,1), pt.bg=c(4,NA),
bty='n')
par(mfrow=c(2,2))
for (k in 1:length(time_index)) {
tk = time_index[k]
kde_pimh = density(out_pimh$x[tk,])
plot(kde_pimh, col='blue', lwd=2,
xlab=bquote(x[.(tk)]), ylab='Posterior density',
main=paste('t=', tk, sep=''))
points(data$x_true[tk], 0, col='green', pch=8, lwd=2)
}
plot(0, type='n', bty='n', xaxt='n', yaxt='n', xlab="", ylab="")
legend('center', leg=c('Posterior density', 'True value'),
col=c('blue', 'green'), pch=c(NA,8), lwd=2, lty=c(1,NA),
bty='n')
par(mfrow=c(1,1))