import numpy as np
import pandas as pd
from scipy import stats
from matplotlib import pylab as plt
import seaborn as sns
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6
import statsmodels.api as sm
# 日付形式で読み込み
# 読み込み用関数作成
#データの読み込み
df = pd.read_csv("./状態空間モデルサンプルソース/AirPassengers.csv")
df["Month"]=pd.to_datetime(df["Month"])
df.head()
| Month | #Passengers | Variable1 | |
|---|---|---|---|
| 0 | 1949-01-01 | 112 | 86.123178 |
| 1 | 1949-02-01 | 118 | 110.534851 |
| 2 | 1949-03-01 | 132 | 95.151958 |
| 3 | 1949-04-01 | 129 | 127.505903 |
| 4 | 1949-05-01 | 121 | 115.030655 |
#可視化
fig = plt.figure(figsize=(15, 4))
plt.plot(df["Month"], df["#Passengers"], alpha=0.7, label= "Passengers")
plt.plot(df["Month"], df["Variable1"], alpha=0.7, label = "Variable1")
plt.grid()
plt.legend()
plt.show()
df.shape
(144, 3)
df.shape[0] * 0.8
115.2
# 8割を学習データに使う
num_train = int(df.shape[0]*0.8)
# 観測データ
train_data = df["#Passengers"][:num_train]
# 外生変数
exog_data1 = df["Variable1"][:num_train]
exog_data2 = df["Variable1"][:num_train]
# 外生変数を複数列渡したいときは、DataFrameにして渡す
exog_data = pd.DataFrame({'ex1': exog_data1, 'ex2': exog_data2})
# ローカルレベルモデルの推定
#インスタンス(ローカルレベル・外生変数=Variable1)
mod_local_level = sm.tsa.UnobservedComponents(train_data, 'local level', exog=exog_data)
# ※ デフォルトでカルマンフィルタが効くようになっている!!!
# 最尤法によるパラメータの推定
# パラメータ:状態値の誤差分散、観測値の誤差分散
# 準ニュートン法で最適化
res_local_level = mod_local_level.fit(method='lbfgs',maxiter=100)
# 学習結果
print(res_local_level.summary())
# 学習結果(状態・トレンド)の描画
fig = res_local_level.plot_components()
Unobserved Components Results
==============================================================================
Dep. Variable: #Passengers No. Observations: 115
Model: local level Log Likelihood -510.094
Date: Sun, 18 Apr 2021 AIC 1026.187
Time: 11:19:56 BIC 1034.396
Sample: 0 HQIC 1029.518
- 115
Covariance Type: opg
====================================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------------
sigma2.irregular 64.5541 42.627 1.514 0.130 -18.994 148.102
sigma2.level 330.9011 86.961 3.805 0.000 160.461 501.341
beta.Variable1 0.4788 0.065 7.370 0.000 0.351 0.606
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 1.37
Prob(Q): 0.93 Prob(JB): 0.50
Heteroskedasticity (H): 3.97 Skew: -0.18
Prob(H) (two-sided): 0.00 Kurtosis: 3.40
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
# 学習されたパラメータの取り出し
print(res_local_level.params)
sigma2.irregular 64.554112 sigma2.level 330.901116 beta.Variable1 0.478798 dtype: float64
# ローカル線形トレンドモデル
# mod_trend = sm.tsa.UnobservedComponents(train_data, 'local linear trend', exog=exog_data)
mod_trend = sm.tsa.UnobservedComponents(train_data, 'local linear trend')
# 最尤法によるパラメータの推定
res_trend = mod_trend.fit(method='bfgs')
Optimization terminated successfully.
Current function value: 4.769711
Iterations: 15
Function evaluations: 28
Gradient evaluations: 28
# 推定されたパラメータの一覧
print(res_trend.summary())
# 推定された状態・トレンドの描画
rcParams['figure.figsize'] = 15, 20
fig = res_trend.plot_components()
Unobserved Components Results
==============================================================================
Dep. Variable: #Passengers No. Observations: 115
Model: local linear trend Log Likelihood -548.517
Date: Sun, 18 Apr 2021 AIC 1103.033
Time: 12:47:53 BIC 1111.216
Sample: 0 HQIC 1106.354
- 115
Covariance Type: opg
====================================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------------
sigma2.irregular 86.8839 109.615 0.793 0.428 -127.958 301.726
sigma2.level 0.0005 404.267 1.12e-06 1.000 -792.349 792.350
sigma2.trend 554.4769 268.896 2.062 0.039 27.449 1081.504
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 2.93
Prob(Q): 0.96 Prob(JB): 0.23
Heteroskedasticity (H): 5.43 Skew: 0.38
Prob(H) (two-sided): 0.00 Kurtosis: 3.22
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
# 季節変動ありのローカルレベルモデル
mod_season_local_level = sm.tsa.UnobservedComponents(
train_data, 'local level', seasonal=12
)
# まずはNelder-Meadでパラメータを推定し、その結果を初期値としてまた最適化する。
# 2回目はBFGSを使用。
res_season_local_level = mod_season_local_level.fit(
method='bfgs',
maxiter=500,
start_params=mod_season_local_level.fit(method='nm', maxiter=500).params,
)
# 推定されたパラメータの一覧
print(res_season_local_level.summary())
# 推定された状態・トレンド・季節の影響の描画
rcParams['figure.figsize'] = 15, 20
fig = res_season_local_level.plot_components()
Optimization terminated successfully.
Current function value: 3.662551
Iterations: 107
Function evaluations: 204
Optimization terminated successfully.
Current function value: 3.662551
Iterations: 0
Function evaluations: 1
Gradient evaluations: 1
Unobserved Components Results
=====================================================================================
Dep. Variable: #Passengers No. Observations: 115
Model: local level Log Likelihood -421.193
+ stochastic seasonal(12) AIC 848.387
Date: Sun, 18 Apr 2021 BIC 856.291
Time: 12:56:54 HQIC 851.588
Sample: 0
- 115
Covariance Type: opg
====================================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------------
sigma2.irregular 1.108e-09 15.691 7.06e-11 1.000 -30.754 30.754
sigma2.level 126.8894 24.738 5.129 0.000 78.403 175.376
sigma2.seasonal 5.0747 3.738 1.357 0.175 -2.252 12.402
===================================================================================
Ljung-Box (L1) (Q): 15.25 Jarque-Bera (JB): 2.29
Prob(Q): 0.00 Prob(JB): 0.32
Heteroskedasticity (H): 5.53 Skew: 0.22
Prob(H) (two-sided): 0.00 Kurtosis: 3.58
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
# 季節変動ありのローカル線形トレンドモデル
mod_season_trend = sm.tsa.UnobservedComponents(
train_data, 'local linear trend', seasonal=12
)
# まずはNelder-meadでパラメータを推定し、その結果を初期値としてまた最適化する。2回目はBFGSを使用。
res_season_trend = mod_season_trend.fit(
method='bfgs',
maxiter=500,
start_params=mod_season_trend.fit(method='nm', maxiter=500).params,
)
# 推定されたパラメータの一覧
print(res_season_trend.summary())
# 推定された状態・トレンド・季節の影響の描画
rcParams['figure.figsize'] = 15, 20
fig = res_season_trend.plot_components()
Optimization terminated successfully.
Current function value: 3.594236
Iterations: 251
Function evaluations: 432
Optimization terminated successfully.
Current function value: 3.591415
Iterations: 30
Function evaluations: 35
Gradient evaluations: 35
Unobserved Components Results
=====================================================================================
Dep. Variable: #Passengers No. Observations: 115
Model: local linear trend Log Likelihood -413.013
+ stochastic seasonal(12) AIC 834.025
Date: Sun, 18 Apr 2021 BIC 844.525
Time: 13:23:05 HQIC 838.277
Sample: 0
- 115
Covariance Type: opg
====================================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------------
sigma2.irregular 1.282e-06 14.604 8.78e-08 1.000 -28.624 28.624
sigma2.level 5.325e-07 37.678 1.41e-08 1.000 -73.847 73.847
sigma2.trend 44.8916 15.826 2.837 0.005 13.874 75.909
sigma2.seasonal 11.9569 3.615 3.307 0.001 4.871 19.043
===================================================================================
Ljung-Box (L1) (Q): 3.44 Jarque-Bera (JB): 4.84
Prob(Q): 0.06 Prob(JB): 0.09
Heteroskedasticity (H): 2.40 Skew: -0.49
Prob(H) (two-sided): 0.01 Kurtosis: 3.41
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
res_season_trend.forecast(steps=2)
115 516.754913 116 493.253014 Name: predicted_mean, dtype: float64
fcast = res_season_trend.get_forecast(steps=2)
# Most results are collected in the 'summary_frame' attribute.
# Here we specify that we want a confidence level of 90%
print(fcast.summary_frame(alpha=0.10))
#Passengers mean mean_se mean_ci_lower mean_ci_upper 115 516.754913 13.520217 494.516135 538.993692 116 493.253014 21.461688 457.951678 528.554350