install.packages("datasets")
install.packages("ggplot2")
install.packages("dlm")
install.packages("rstan")
install.packages("forecast")
install.packages("tibble")
library(datasets)
library(ggplot2)
library(dlm)
library(rstan)
library(forecast)
library(tibble)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
needs::prioritize(magrittr)
datasets
パッケージに含まれるNile
データを用います。このデータは、1871-1970年の100年間のナイル川での年間河川流量(\(10^8 m^3\))に関する時系列データです。1898年のアスワンダム建設で、1989年と1899年の間に流量が変化しています。data(Nile)
dlm
パッケージを用いて、ローカルレベルモデル、フィルタリング、スムージングなどを行えます。Nile.df <- data.frame(
"flow" = c(Nile),
"year" = as.numeric(c(1871:1970), units="years")
)
Nile.df
からナイル川の流量を可視化します。Nile.df %>%
ggplot(., aes(x=year)) +
geom_line(aes(y=flow), col="blue") +
labs(y="Annual Flow", x ="Year")
buildModPoly1 <- function(theta){
dlmModPoly(order=1, # ローカルレベルモデル。2はローカルトレンドモデル
dV=exp(theta[1]), # 観測方程式のノイズ
dW=exp(theta[2]), # 状態の分散行列の対角成分
m0=theta[3]) # 初期値mu0
}
dlmMLE()
関数を使ってモデルを推定します。Nile.local <- dlmMLE(Nile.df$flow, parm=c(1,1,Nile.df$flow[1]), buildModPoly1)
Nile.local
$par
[1] 9.622363 7.292343 1119.999910
$value
[1] 549.63
$counts
function gradient
29 29
$convergence
[1] 0
$message
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
mod.Nile <- buildModPoly1(Nile.local$par)
mod.Nile
$FF
[,1]
[1,] 1
$V
[,1]
[1,] 15098.68
$GG
[,1]
[1,] 1
$W
[,1]
[1,] 1469.009
$m0
[1] 1120
$C0
[,1]
[1,] 1e+07
Nile.filt <- dlmFilter(Nile.df$flow, mod.Nile)
Nile.filt$m
[1] 1119.9999 1120.0000 1140.9141 1072.8136 1117.3106 1129.9719 1138.4573 1048.8600 1098.0306 1171.3000 1162.9020
[12] 1117.9508 1069.0283 1079.9775 1057.0087 1047.1237 1023.8553 1065.5552 994.3711 984.6583 1026.1416 1045.8651
[23] 1089.6964 1105.8001 1144.3077 1175.2026 1187.1655 1145.1955 1133.1263 1037.2240 984.5566 955.0332 885.3260
[34] 899.9263 882.0541 833.7048 855.6812 811.9712 867.5239 916.2530 930.3387 903.8110 856.3277 749.4229
[45] 769.3382 751.3560 849.7999 916.6142 894.0185 859.2979 849.0707 827.4213 832.1156 840.6301 846.3368
[56] 806.7244 816.9457 797.4660 797.0745 861.9462 834.4551 820.1803 832.1491 835.5808 864.5334 896.4362
[67] 896.5868 876.6689 912.2741 874.5478 821.5269 775.4547 794.2934 799.0218 783.7945 788.3891 855.5801
[78] 856.7604 861.3641 857.7953 866.3954 833.7105 811.0891 818.2755 880.1560 890.2620 915.8282 884.0959
[89] 894.4850 915.9860 889.0183 923.9961 919.1904 914.3328 982.6070 963.7519 905.6027 909.1803 858.1269
[100] 819.6388 798.3718
Nile.df %>%
ggplot(., aes(x=year)) +
geom_line(aes(y=flow), col = "blue") +
geom_line(aes(y=Nile.filt$m[2:101]), col="orange") +
labs(y="Annual Flow", x="Year")
Nile.smooth <- dlmSmooth(Nile.filt) # スムージングする
Nile.smooth$s
[1] 1111.6727 1111.6715 1110.8600 1105.2674 1113.5166 1112.3785 1106.6072 1095.6411 1112.1742 1117.2436 1097.7213
[12] 1074.0856 1058.1445 1054.1845 1044.7940 1040.3455 1037.8765 1042.9844 1034.7615 1049.4768 1073.0921 1090.1978
[23] 1106.3497 1112.4172 1114.8280 1104.0874 1078.1776 1038.4692 999.5849 950.9308 919.4909 895.7851 874.1988
[34] 870.1447 859.2942 851.0019 857.3038 857.8950 874.6267 877.2145 862.9914 838.4543 814.6424 799.4549
[45] 817.6834 835.2973 865.8803 871.7390 855.3893 841.3152 834.7634 829.5507 830.3265 829.6747 825.6833
[56] 818.1584 822.3242 824.2838 834.0545 847.5277 842.2745 845.1235 854.2112 862.2493 871.9656 874.6734
[67] 866.7444 855.8717 848.2946 824.9844 806.9267 801.6073 811.1356 817.2719 823.9210 838.5406 856.8127
[78] 857.2618 857.4444 856.0163 855.3682 851.3505 857.7775 874.7877 895.3773 900.9229 904.8071 900.7917
[89] 906.8746 911.3887 909.7137 917.2538 914.7973 913.1967 912.7828 887.3432 859.5046 842.7094 818.4916
[100] 804.0510 798.3718
Nile.df %>%
ggplot(., aes(x=year)) +
geom_line(aes(y=flow), col="blue") +
geom_line(aes(y=Nile.filt$m[2:101]), col="orange") +
geom_line(aes(y=Nile.smooth$s[2:101]), col="red") +
labs(y="Annual Flow", x="Year")
model.Nile <- auto.arima(
Nile.df$flow,
ic="aic",
trace=T,
stepwise=F,
approximation=F
)
ARIMA(0,1,0) : 1296.697
ARIMA(0,1,0) with drift : 1298.645
ARIMA(0,1,1) : 1269.091
ARIMA(0,1,1) with drift : 1270.309
ARIMA(0,1,2) : 1267.957
ARIMA(0,1,2) with drift : 1268.544
ARIMA(0,1,3) : 1269.208
ARIMA(0,1,3) with drift : 1269.604
ARIMA(0,1,4) : 1270.472
ARIMA(0,1,4) with drift : 1270.69
ARIMA(0,1,5) : 1272.462
ARIMA(0,1,5) with drift : 1272.689
ARIMA(1,1,0) : 1281.48
ARIMA(1,1,0) with drift : 1283.346
ARIMA(1,1,1) : 1267.255
ARIMA(1,1,1) with drift : 1267.637
ARIMA(1,1,2) : 1268.923
ARIMA(1,1,2) with drift : 1269.134
ARIMA(1,1,3) : 1270.858
ARIMA(1,1,3) with drift : 1271.085
ARIMA(1,1,4) : Inf
ARIMA(1,1,4) with drift : 1272.689
ARIMA(2,1,0) : 1277.484
ARIMA(2,1,0) with drift : 1279.282
ARIMA(2,1,1) : 1268.896
ARIMA(2,1,1) with drift : 1269.132
ARIMA(2,1,2) : Inf
ARIMA(2,1,2) with drift : Inf
ARIMA(2,1,3) : Inf
ARIMA(2,1,3) with drift : Inf
ARIMA(3,1,0) : 1278.196
ARIMA(3,1,0) with drift : 1279.952
ARIMA(3,1,1) : 1270.873
ARIMA(3,1,1) with drift : 1271.057
ARIMA(3,1,2) : 1272.746
ARIMA(3,1,2) with drift : 1272.914
ARIMA(4,1,0) : 1277.333
ARIMA(4,1,0) with drift : 1278.989
ARIMA(4,1,1) : 1272.236
ARIMA(4,1,1) with drift : 1272.579
ARIMA(5,1,0) : 1277.125
ARIMA(5,1,0) with drift : 1278.674
Best model: ARIMA(1,1,1)
summary(model.Nile)
Series: Nile.df$flow
ARIMA(1,1,1)
Coefficients:
ar1 ma1
0.2544 -0.8741
s.e. 0.1194 0.0605
sigma^2 estimated as 20177: log likelihood=-630.63
AIC=1267.25 AICc=1267.51 BIC=1275.04
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -16.06603 139.8986 109.9998 -4.005967 12.78745 0.825499 -0.03228482
plot(forecast(model.Nile, level = c(95), h = 50))
rstan
パッケージを用いて状態空間モデルをベイズ推定します。Nile.data.stan <- list(
year = nrow(Nile.df),
flow = Nile.df$flow
)
stan
コードを作成し、Nile.stan
として保存した後、rstanコード用の適当なフォルダに置きます。// stan program for stated space model (local level model) for Nile data
data {
int<lower=1> year; // length year
vector<lower=0>[year] flow; // Nile Annual Flow
}
parameters {
real mu0; // average of state variable at t0
vector[year] mu; // Local level average of parameter
real<lower=0> sigmaV; // observed noise
real<lower=0> sigmaW; // Local level noise
}
model{
// State equation
mu[1] ~ normal(mu0, sigmaW);
// State transition
for(i in 2:year) {
mu[i] ~ normal(mu[i-1], sigmaW);
}
// Observed equation
for(i in 1:year){
flow[i] ~ normal(mu[i], sigmaV);
}
sigmaV ~ cauchy(0,5);
sigmaW ~ cauchy(0,5);
}
setwd("~/(フォルダの指定)/rstan")
などとして、stan
コードを置いたフォルダを作業用フォルダとして指定します。option
を指定します。rstan_options(auto_write = TRUE)
options(mc.cores = 3)
rstan()
関数を用いてstan
プログラムを実行します。Nile.stan.model1 <- rstan::stan(
file = 'Nile.stan',
data = Nile.data.stan,
iter = 4500,
warmup = 500,
chains = 3,
thin = 10,
seed = 1234
)
stan_rhat(Nile.stan.model1)
traceplot(Nile.stan.model1, pars = "mu0", inc_warmup = FALSE)
traceplot(Nile.stan.model1, pars = "sigmaV", inc_warmup = FALSE)
traceplot(Nile.stan.model1, pars = "sigmaW", inc_warmup = FALSE)
Nile.stan.pars1 <- rstan::extract(Nile.stan.model1)
Nile.df2 <- data.frame(
"Year" = Nile.df$year,
"Flow" = Nile.df$flow,
"muM" = apply(Nile.stan.pars1$mu, 2, mean),
"muL" = apply(Nile.stan.pars1$mu, 2, quantile, 0.025),
"muU" = apply(Nile.stan.pars1$mu, 2, quantile, 0.925)
)
Nile.df2 %>%
ggplot(., aes(x=Year, y=Flow)) +
geom_point(col = "black") +
labs(y = "Nile Flow", x = "Year") +
geom_smooth(aes(ymin=muL, ymax=muU, y=muM),
lwd=1.2,color="blue",stat="identity"
)