Question 2
The data set linked below contains a time series generated from the following univariate dynamic linear model:
where
a. Apply a Kalman filter to this data to make one-step ahead predictions of given . Create a times-series plot containing the observations
and one-step ahead predictions of . Include a 95% confidence band around your predictions. Report the numerical values found for and .
set.seed(
123
)
Ft <- 1.2
Gt <- 0.8
m0 <- 0
C0 <- 25
Vt <- 9
Wt <- 4
n <- nrow(dlm_data)
y <- rep(
NA
, n)
theta <- rep(
NA
, n)
theta0 <- rnorm(
1
, m0, sqrt(C0))
theta[
1
] <- Gt * theta0 + rnorm(
1
, 0
, sqrt(Wt))
for
(i in
2
:n) theta[i] <- theta[i-
1
] + rnorm(
1
, 0
, sqrt(Wt))
for
(j in
1
:n) y[j] <- theta[j] + rnorm(
1
, 0
, sqrt(Vt))
dlm_mod <- dlm(FF = Ft,
GG = Gt,
V = Vt,
W = Wt,
m0 = m0,
C0 = C0) dlm_data_filtered <- dlmFilter(y = dlm_data$yt,
mod = dlm_mod)
dlm_data$pred <- dlm_data_filtered$a
dlm_data$pSE <- sqrt(unlist(
dlmSvd2var(dlm_data_filtered$U.R, dlm_data_filtered$D.R)))
gg_sim <- ggplot(dlm_data,
aes(y = yt,
x = time)) +
geom_line(linetype = "dashed"
,
color = "black"
) gg_sim +
geom_line(data = dlm_data, aes(y = pred,
x = time),
color = "red"
,
linewidth = 1.2
) +
geom_ribbon(data = dlm_data,
aes(x = time, ymin = pred - 1.96 * pSE,
ymax = pred + 1.96 * pSE),
fill = "red"
,
alpha = 0.2
) +
labs(title = expression(
paste(
"One-Step-Ahead Predictions of "
,
theta[t],
" with Standard Errors"
))) +
ylab(
""
)+
theme_bw()
= 3.529 and = 5.951
b. Apply a Kalman filter to this data to make one-step-ahead predictions of given . Create a time-series plot showing the observed
values of and one-step ahead predictions of . Include a 95% confidence band around your predictions. Report the numerical values of and . (Hint: R’s DLM package does not provide these values directly, so you will need to calculate them.)
dlm_data$ft <- dlm_data_filtered$f
for
(i in
1
:
100
){
dlm_data$qSE[i] <- sqrt(Ft*dlm_data$pSE[i]^
2
*Ft+Vt)
}
gg_sim +
geom_line(data = dlm_data, aes(y = ft,
x = time),
color = "orange"
,
linewidth = 1.2
) +
geom_ribbon(data = dlm_data,
aes(x = time, ymin = ft - 1.96 * qSE,
ymax = ft + 1.96 * qSE),
fill = "orange"
,
alpha = 0.2
) +
labs(title = expression(
paste(
"One-Step-Ahead Predictions of "
,
y[t],
" with Standard Errors"
))) +
ylab(
""
)+
theme_bw()
The one-step-ahead predictive distribution of given is
= 4.235 and = 17.569
c. Apply a Kalman filter to this data to find the filtering distribution of the values of given . Create a time-series plot showing the observed
values of and filtered predictions of . Include a 95% confidence band around your predictions. Report the numerical values of and
.
dlm_data$filtered <- dropFirst(dlm_data_filtered$m)
dlm_data$fSE <- dropFirst(sqrt(unlist(
dlmSvd2var(dlm_data_filtered$U.C, dlm_data_filtered$D.C))))
gg_sim +
geom_line(data = dlm_data, aes(y = filtered,
x = time),
color = "blue"
,
linewidth = 0.8
) +
geom_ribbon(data = dlm_data,
aes(x = time, ymin = filtered - 1.96 * fSE,
ymax = filtered + 1.96 * fSE),
fill = "blue"
,
alpha = 0.2
) +
labs(title = expression(
paste(
"Filtering distribution of "
,
theta[t],
" with Standard Errors"
))) +
ylab(
""
)+
theme_bw()
= 0.501 and = 3.048
d. The filtering distribution of is N(
(your answer should match this). Analytically (i.e., not using code) show
that the predictive distribution of is .
where 5.951
7.809
8.998
9.758
10.245
10.557
e. Apply a Kalman smoother to this data to create the smoothing distribution for given . Create a time-series plot showing the observed
values of and smoothed estimates of . Include a 95% confidence band around your predictions. Additionally, report your values of for
the values of such that is missing.
dlm_data_smoothed <- dlmSmooth(dlm_data_filtered)
dlm_data$smoothed <- dropFirst(dlm_data_smoothed$s)
dlm_data$sSE <- dropFirst(sqrt(unlist(
dlmSvd2var(dlm_data_smoothed$U.S, dlm_data_smoothed$D.S))))
gg_sim +
geom_line(data = dlm_data, aes(y = smoothed,
x = time),
color = "purple"
,
linewidth = 1.2
) +
geom_ribbon(data = dlm_data,
aes(x = time, ymin = smoothed - 1.96 * sSE,
ymax = smoothed + 1.96 * sSE),
fill = "purple"
,
alpha = 0.2
) +
labs(title = expression(
paste(
"Smoothed Values of "
,
theta[t],
" with Standard Errors"
))) +
ylab(
""
)+
theme_bw()
= 2.715
= 2.831
= 2.265
= 1.812
= 1.450
= 1.160
= -0.503
= 1.391
f. Create a plot showing forecasted values (using the DLM forecasting methods discussed in lecture) of (including confidence bands),
along with the original plot of . Report the numerical values of and and provide a non-technical explanation for why the
predictive variance of is less than that ?
dlm_data_forecast<- dlmForecast(dlm_data_filtered, nAhead = 10
)
val <- dlm_data$yt
fore_pt_data <- bind_rows(
data.frame(Time = 1
:
100
,
Type = factor(rep(
"Data"
, 100
),
levels = c(
"Data"
,
"Pred"
)),
y = as.numeric(val)),
data.frame(Time = 101
:
110
,
Type = factor(rep(
"Pred"
, 10
),
levels = c(
"Data"
,
"Pred"
)),
y = dlm_data_forecast$f)
)
fore_pt_pred<- data.frame(time = 101
:
110
,
yt = dlm_data_forecast$f,
SE = sqrt(unlist(dlm_data_forecast$Q)))
# Plot data and forecasts
gg_sim +
geom_line(data = fore_pt_pred, aes(y = yt,
x = time),
color = 'red'
,
linewidth = 0.8
) +
geom_ribbon(data = fore_pt_pred,
aes(x = time, ymin = yt - 1.96 * SE,
ymax = yt + 1.96 * SE),
fill = 'red'
,
alpha = 0.2
) +
labs(title = expression(
paste(
"Observations(yt) and forecasted values with Standard Errors"
))) +
ylab(
""
) +
geom_vline(xintercept = 100
) +
theme_bw()
and The variance of the forecast due to estimation error increases as the lead time gets larger, but there is a limit to the increase with
stationary models. Therefore, the predictive variance of is less than that of .
=
+
y
t
f
t
θ
t
v
t
=
+
θ
t
g
t
θ
t
−
1
w
t
= 1.2
F
t
= 0.8
G
t
∼
N
(0,
= 25)
θ
0
σ
2
θ
∼
N
(0,
= 9)
v
t
σ
2
v
∼
N
(0,
= 4)
w
t
σ
2
w
θ
t
y
1:(
t
−
1)
y
t
θ
t
θ
t
a
40
R
40
a
40
R
40
y
t
y
1:(
t
−
1)
y
t
y
t
y
t
f
40
Q
40
Y
t
y
1:
t
−
1
=
E
(
|
) =
= 1.2
×
3.529 = 4.235
f
t
Y
t
y
1:
t
−
1
F
t
a
t
=
Var
(
|
) =
′
+
= 1.2
×
5.951
×
1.2 + 9 = 17.569
Q
t
Y
t
y
1:
t
−
1
F
t
R
t
F
t
V
t
f
40
Q
40
θ
t
y
1:
t
y
t
θ
t
θ
t
m
40
C
40
m
40
C
40
|
θ
22
y
1:22
= 3.539,
= 3.048)
m
22
C
22
|
θ
28
y
1:27
N
(
= .928,
= 10.557)
a
28
R
28
|
∼
N
(
,
)
θ
t
y
1:(
t
−
1)
a
t
R
t
=
E
[
|
] =
a
t
θ
t
y
1:(
t
−
1)
G
t
m
t
−
1
=
Var
[
|
] =
+
R
t
θ
t
y
1:(
t
−
1)
G
t
C
t
−
1
G
′
t
w
t
=
= 0.8
×
3.539 = 2.831
=
+
= 0.8
×
3.048
×
0.8 + 4 =
a
23
G
t
m
22
R
23
G
t
C
22
G
′
t
w
t
=
= 0.8
×
2.831 = 2.265
=
+
= 0.8
×
5.951
×
0.8 + 4 =
a
24
G
t
m
23
R
24
G
t
C
23
G
′
t
w
t
=
= 0.8
×
2.265 = 1.812
=
+
= 0.8
×
7.809
×
0.8 + 4 =
a
25
G
t
m
24
R
25
G
t
C
24
G
′
t
w
t
=
= 0.8
×
1.812 = 1.450
=
+
= 0.8
×
8.997
×
0.8 + 4 =
a
26
G
t
m
25
R
26
G
t
C
25
G
′
t
w
t
=
= 0.8
×
1.450 = 1.160
=
+
= 0.8
×
9.758
×
0.8 + 4 =
a
27
G
t
m
26
R
27
G
t
C
26
G
′
t
w
t
=
= 0.8
×
1.160 = 0.928
=
+
= 0.8
×
10.245
×
0.8 + 4 =
a
28
G
t
m
27
R
28
G
t
C
27
G
′
t
w
t
θ
t
y
1:
T
y
t
θ
t
θ
t
θ
t
t
y
t
θ
11
θ
23
θ
24
θ
25
θ
26
θ
27
θ
64
θ
80
y
101:110
y
1:100
Q
101
Q
110
y
101
y
110
= 17.569
Q
101
= 24.866
Q
110
y
101
y
110