import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from pandas_datareader.data import DataReader
np.random.seed(17429)
sns.set()
sns.set_style('whitegrid')
cpi = DataReader('CPIAUCNS', 'fred', start='1970-01', end='2016-12')
cpi.index = pd.DatetimeIndex(cpi.index, freq='MS')
inf = np.log(cpi).resample('QS').mean().diff()[1:] * 400
plt.figure(figsize=(20,7))
sns.lineplot(x=inf.index, y='CPIAUCNS', data=inf)
<AxesSubplot:xlabel='DATE', ylabel='CPIAUCNS'>
nile = sm.datasets.nile.load_pandas().data['volume']
nile.index = pd.date_range('1871', '1970', freq='AS')
nile
1871-01-01 1120.0
1872-01-01 1160.0
1873-01-01 963.0
1874-01-01 1210.0
1875-01-01 1160.0
...
1966-01-01 746.0
1967-01-01 919.0
1968-01-01 718.0
1969-01-01 714.0
1970-01-01 740.0
Freq: AS-JAN, Name: volume, Length: 100, dtype: float64
plt.figure(figsize=(20,7))
sns.lineplot(x=nile.index, y=nile, data=nile)
<AxesSubplot:ylabel='volume'>
# from pandas_datareader.data import DataReader
start = '1984-01'
end = '2016-09'
labor = DataReader('HOANBS', 'fred',start=start, end=end).resample('QS').first()
cons = DataReader('PCECC96', 'fred', start=start, end=end).resample('QS').first()
inv = DataReader('GPDIC1', 'fred', start=start, end=end).resample('QS').first()
pop = DataReader('CNP16OV', 'fred', start=start, end=end)
pop = pop.resample('QS').mean() # Convert pop from monthly to quarterly observations
recessions = DataReader('USRECQ', 'fred', start=start, end=end)
recessions = recessions.resample('QS').last()['USRECQ'].iloc[1:]
# Get in per-capita terms
N = labor['HOANBS'] * 6e4 / pop['CNP16OV']
C = (cons['PCECC96'] * 1e6 / pop['CNP16OV']) / 4
I = (inv['GPDIC1'] * 1e6 / pop['CNP16OV']) / 4
Y = C + I
# Log, detrend
y = np.log(Y).diff()[1:]
c = np.log(C).diff()[1:]
n = np.log(N).diff()[1:]
i = np.log(I).diff()[1:]
rbc_data = pd.concat((y, n, c), axis=1)
rbc_data.columns = ['output', 'labor', 'consumption']
rbc_data
| output | labor | consumption | |
|---|---|---|---|
| DATE | |||
| 1984-04-01 | 0.014809 | 0.010369 | 0.011373 |
| 1984-07-01 | 0.007422 | 0.001567 | 0.004724 |
| 1984-10-01 | 0.004870 | 0.003323 | 0.009835 |
| 1985-01-01 | 0.006340 | 0.005746 | 0.014648 |
| 1985-04-01 | 0.008174 | 0.004318 | 0.006753 |
| ... | ... | ... | ... |
| 2015-07-01 | 0.003598 | 0.000893 | 0.005739 |
| 2015-10-01 | -0.000803 | 0.002283 | 0.002782 |
| 2016-01-01 | 0.001538 | -0.000079 | 0.004335 |
| 2016-04-01 | 0.000967 | 0.001380 | 0.004007 |
| 2016-07-01 | 0.002544 | 0.000203 | 0.003752 |
130 rows × 3 columns
plt.figure(figsize=(20,7))
sns.lineplot(x=rbc_data.index, y='output', data=rbc_data, label='output')
sns.lineplot(x=rbc_data.index, y='labor', data=rbc_data, label='labor')
sns.lineplot(x=rbc_data.index, y='consumption', data=rbc_data, label='consumption')
plt.legend()
<matplotlib.legend.Legend at 0x1a21d6e9fa0>
# create a new class with parent sm.tsa.staespace.MLEModel
class LocalLevel(sm.tsa.statespace.MLEModel):
# define the initial parameter vector; see update() below for a note
# on the required order of parameter values in the vector
start_params = [1.0, 1.0]
# recall that the constructor ( the __init__ method ) is always evaluated
# at the point of object instantiation
# Here we require a single instantiation argument, the observed dataset, called 'endog' here
def __init__(self, endog):
super(LocalLevel, self).__init__(endog, k_states=1)
# specify the fixed elements of the state space matrices
self['design', 0, 0] = 1.0
self['transition', 0, 0] = 1.0
self['selection', 0, 0] = 1.0
# Initialize as approximate diffuse, and "burn" the first loglikelihood value
self.initialize_approximate_diffuse()
self.loglikelihood_burn = 1
# Here we define how to update the state space matrices with the parameters.
# Note that we must include the **kwargs argument
def update(self, params, **kwargs):
# Using the parameters in a specific order in the update method
# implicitly defines the required order of parameters
self['obs_cov', 0, 0] = params[0]
self['state_cov', 0, 0] = params[1]
# Instantiate a new object
nile_model_1 = LocalLevel(nile)
# Computer the loglikelihood at values specific to the nile model
# 流量と、分散を入れた時の確からしさを求めている
print('流量:15099.0、分散:1469.1時の確からしさ(対数尤度)')
print(nile_model_1.loglike([15099.0, 1469.1]))
流量:15099.0、分散:1469.1時の確からしさ(対数尤度) -632.5376950475525
# Try computing the loglikelihood with a different set of values;
# notic that it is different
print(nile_model_1.loglike([10000.0, 1.0]))
-687.5456216003191
# Retrieve filtering output
# 直近の流量と誤差分散
# ※filter()の引数に、最尤法で求めたパラメータを入れればカルマンフィルが働く!!!
nile_filtered_1 = nile_model_1.filter([15099.0, 14969.1])
# print the filtered estimate of the unobserved level
print('filtering後のナイル川の流量:')
print(nile_filtered_1.filtered_state[0], '\n')
print('filtering後のナイル川の流量の分散')
print(nile_filtered_1.filtered_state_cov[0, 0])
filtering後のナイル川の流量: [1103.34065938 1140.96458431 1030.01901376 1141.16863213 1152.78204011 1157.23248587 944.99218664 1120.71609064 1274.41402889 1191.54001912 1070.36175098 986.90341717 1062.79956673 1020.38066211 1020.14596194 983.06250424 1104.48583939 916.136184 941.94764332 1064.05835459 1086.21845386 1162.53692284 1154.80718617 1213.49905128 1242.16957062 1228.50075051 1106.11357856 1102.34420445 899.90104741 862.96859365 869.7700925 761.39768344 871.51632214 847.76878603 757.27735662 855.13902917 754.55439983 918.21693625 999.46880276 980.68302692 888.39479977 788.26903082 583.40599175 731.74608709 713.40590719 964.09463769 1047.88813924 914.78063838 821.8156704 821.31276251 788.44236673 823.31341266 848.39904255 856.78481799 758.8848112 811.97979644 770.06632753 786.05593373 942.62711053 829.41039627 799.56259229 839.90853178 843.04771586 905.29062691 953.81949924 918.78699782 859.11222625 952.14324937 840.45797891 739.06014091 683.53286495 783.70323278 801.14982332 764.68054258 787.07358477 943.01732096 891.83235008 880.83767681 860.59135337 878.72348175 795.65867779 766.89090937 810.7337688 958.25521294 933.43555027 965.84456802 861.7421446 899.51114881 946.05438347 865.25179042 960.66299509 926.96010297 910.95420084 1070.67089049 972.84112658 832.98047673 886.01647865 782.42462013 740.23689173 740.0908343 ] filtering後のナイル川の流量の分散 [14874.41126432 10026.30168861 9412.92435357 9324.58546206 9311.63622102 9309.73316184 9309.45337708 9309.41224129 9309.40619318 9309.40530394 9309.4051732 9309.40515398 9309.40515115 9309.40515074 9309.40515067 9309.40515067 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066 9309.40515066]
# Retrieve smoothing output
nile_smoothed_1 = nile_model_1.smooth([15099.0, 1469.1])
# print the smoothed estimate of the unobserved level
print(nile_smoothed_1.smoothed_state[0], '\n')
print(nile_smoothed_1.smoothed_state_cov[0, 0])
[1107.20389814 1107.58545838 1102.86719725 1111.75771149 1111.08946383 1105.66232692 1094.9482521 1111.66713159 1116.87247926 1097.44906737 1073.88554188 1057.99740877 1054.07665717 1044.71468541 1040.28714272 1037.83349507 1042.95287793 1034.73787262 1049.45965177 1073.08025697 1090.1897161 1106.34465585 1112.4141551 1114.82663304 1104.08703512 1078.17744338 1038.46882403 999.58420291 950.92934221 919.48932333 895.78344346 874.19704547 870.14342974 859.29292101 851.00065656 857.30313113 857.89452749 874.62710209 877.21520855 862.99172901 838.45387429 814.64126539 799.45325964 817.68251223 835.29708695 865.88117954 871.74006125 855.38974527 841.31520084 834.76325801 829.55045038 830.32636827 829.67457358 825.68298923 818.15783838 822.32378499 824.28338498 834.05438446 847.52799313 842.27449237 845.12341938 854.21141621 862.24970721 871.96635736 874.67429009 866.74506801 855.87210658 848.29482775 824.98398587 806.9256689 801.60613598 811.13484958 817.27125955 823.92055166 838.54053603 856.81313326 857.26204507 857.44455981 856.01626607 855.36793766 851.34998458 857.77695251 874.78768235 895.377774 900.92345794 904.80763131 900.79196305 906.87502801 911.38916811 909.71411204 917.25453394 914.79804452 913.19758577 912.7839257 887.34369865 859.50446689 842.70897393 818.49052936 804.04959567 798.37029261] [4015.96493689 3234.23088954 2814.26880663 2588.65735774 2467.45468743 2402.34235665 2367.36280043 2348.57113166 2338.47589737 2333.05254958 2330.13902618 2328.57382718 2327.73297313 2327.28125069 2327.03857698 2326.90820815 2326.83817158 2326.80054664 2326.78033382 2326.76947511 2326.76364162 2326.76050775 2326.75882418 2326.75791974 2326.75743385 2326.75717283 2326.7570326 2326.75695726 2326.75691679 2326.75689505 2326.75688337 2326.7568771 2326.75687373 2326.75687192 2326.75687094 2326.75687042 2326.75687014 2326.75686999 2326.75686991 2326.75686986 2326.75686984 2326.75686983 2326.75686982 2326.75686982 2326.75686982 2326.75686982 2326.75686981 2326.75686981 2326.75686981 2326.75686981 2326.75686981 2326.75686981 2326.75686981 2326.75686981 2326.75686982 2326.75686982 2326.75686982 2326.75686982 2326.75686983 2326.75686984 2326.75686987 2326.75686991 2326.75686999 2326.75687014 2326.75687043 2326.75687095 2326.75687194 2326.75687376 2326.75687717 2326.7568835 2326.75689529 2326.75691724 2326.7569581 2326.75703416 2326.75717573 2326.75743926 2326.7579298 2326.75884292 2326.76054263 2326.76370653 2326.76959595 2326.78055875 2326.80096533 2326.83895096 2326.9096589 2327.04127747 2327.28627748 2327.74233021 2328.59124481 2330.17144805 2333.11290092 2338.58823775 2348.78024648 2367.75205505 2403.0669306 2468.80343807 2591.16797556 2818.94217005 3242.93007322 4032.15794181]
# Retrieve a simulation smoothing object
nile_simsmoother_1 = nile_model_1.simulation_smoother()
# Perform first set of simulation smoothing recursions
nile_simsmoother_1.simulate()
print(nile_simsmoother_1.simulated_state[0, :-1], '\n')
# Perform second set of simulation smoothing recursions
nile_simsmoother_1.simulate()
print(nile_simsmoother_1.simulated_state[0, :]-1)
[1000.09779183 1002.44395002 1043.43340666 1039.97461487 1048.29469613 1022.3277188 1037.87263616 995.31562186 1010.77870773 1073.33232952 1087.77548842 1099.30756079 1090.96723165 1054.78378834 1048.22201051 1052.96384965 1027.44775633 977.65796836 1007.62384409 1062.52792147 1163.10945536 1179.43447074 1111.84490563 1111.90075254 1085.09641497 1063.01691852 1027.96988918 994.4820678 935.6657028 926.78354732 856.95884055 811.41231987 843.08431341 858.78122164 821.04569401 867.42075647 813.24279077 846.94817825 872.36963317 885.12841557 924.63322639 824.72204005 806.24144741 777.75286778 830.2378308 865.86307663 880.12026563 824.52317971 769.24443489 704.8018978 730.56255007 671.61282368 730.54195177 773.04173133 753.82770291 719.92902532 710.48216716 724.85413585 768.25399121 726.61444764 766.55804268 815.15487237 767.77759843 732.19058141 789.32740367 834.70120413 872.04151384 866.35605639 861.16550999 868.37890027 814.10294272 831.69372592 794.44787361 765.84809958 823.9299256 864.40020044 860.52610976 821.23403872 803.09868769 811.71035398 849.06679841 889.34882301 926.49429286 926.71417622 924.74210294 947.14578911 969.37217659 1005.26095653 1076.00562312 987.09645829 988.89567412 941.50056693 910.13928496 870.22631816 913.27539025 915.58632379 882.73185791 891.95310273 849.17081618] [1152.62277865 1164.91510589 1170.67864877 1152.70503014 1141.94694348 1215.48785908 1190.03578653 1183.5066448 1167.26482241 1137.95597208 1084.53747464 1075.97489506 1049.95523923 1092.23234137 1078.7237792 1048.55920618 1053.02644538 1052.40997113 1054.18892364 1129.75947832 1165.05354855 1101.01928153 1121.79791403 1127.37130412 1124.38929169 1092.14037304 1103.70552328 976.65564038 974.21640433 851.63628404 873.11192248 823.52111407 796.76358009 821.98607566 782.2924059 788.79033605 850.51141288 886.33169909 882.25491749 858.8144249 832.42707064 844.26708345 811.50541537 817.85350206 796.19913248 819.77477051 870.83238222 872.7744544 900.7335462 860.89975042 871.11629484 853.79773683 837.67186281 848.91441548 805.35525995 749.10303011 761.58760968 751.4978225 794.34242454 789.87257984 747.80690246 852.8378422 857.9244585 841.99319402 803.62953389 801.86079115 768.56889066 748.80069277 720.47809825 725.0148779 718.6265834 720.5547735 738.04299428 782.62555084 779.40947912 820.30397095 863.8062448 829.41825124 773.75745235 825.67251754 899.60815244 930.70028997 924.99567458 898.01017958 925.48295516 937.4289204 860.41993186 944.86214028 980.28083199 931.44695373 854.5902395 817.78825835 855.61268927 860.56304791 901.76670893 890.12145424 856.22807961 817.04831076 800.16664712 807.44613402]
plt.figure(figsize=(20,7))
sns.scatterplot(x=nile.index, y=nile, data=nile, label='observed_data')
filterd_state = pd.Series(nile_filtered_1.filtered_state[0])
sns.lineplot(x=nile.index, y=filterd_state, data=filterd_state, label='Filtered Level')
smoothed_state = pd.Series(nile_smoothed_1.smoothed_state[0])
sns.lineplot(x=nile.index, y=smoothed_state, data=smoothed_state, label='Smoothed Level')
plt.legend()
<matplotlib.legend.Legend at 0x1a21dc7e7f0>
class ARMA11(sm.tsa.statespace.MLEModel):
start_params = [0, 0, 1]
def __init__(self, endog):
super(ARMA11, self).__init__(
endog, k_states=2, k_posdef=1, initialization='stationary'
)
self['design', 0, 0] = 1.
self['transition', 1, 0] = 1.
self['selection', 0, 0] = 1.
def update(self, params, **kwargs):
self['design', 0, 1] = params[1]
self['transition', 0, 0] = params[0]
self['state_cov', 0, 0] = params[2]
# Example of instantiating a new object, updating the parameters to the starting parameters,
# and evaluating the loglikelihood
inf_model = ARMA11(inf)
print(inf_model.loglike(inf_model.start_params))
-2737.4880350554376
# Load the generic minimization function from scipy
from scipy.optimize import minimize
# create a new function to return the negative of the loglikelihood
nile_model_2 = LocalLevel(nile)
def neg_loglike(params):
return -nile_model_2.loglike(params)
# perform numerical optimization
output = minimize(
neg_loglike,
nile_model_2.start_params,
method='Nelder-Mead'
)
print(output.x)
print(nile_model_2.loglike(output.x))
[15108.31431385 1463.54742276] -632.5376855872639
class FirstMLELocalLevel(sm.tsa.statespace.MLEModel):
start_params = [1.0, 1.0]
param_names = ['obs.var', 'level_var']
def __init__(self, endog):
super(FirstMLELocalLevel, self).__init__(endog, k_states=1)
self['design', 0, 0] = 1.0
self['transition', 0, 0] = 1.0
self['selection', 0, 0] = 1.0
self.initialize_approximate_diffuse()
self.loglikelihood_burn = 1
def update(self, params, **kwargs):
# Transform the parameters if they are not yet transformed
params = super(FirstMLELocalLevel, self).update(params, **kwargs)
self['obs_cov', 0, 0] = params[0]
self['state_cov', 0, 0] = params[1]
nile_mlemodel_1 = FirstMLELocalLevel(nile)
print(nile_mlemodel_1.loglike([15099.0, 1469.1]), '\n')
# Again we use Nelder-Mead; now specified as method='nm'
nile_mleresults_1 = nile_mlemodel_1.fit(method='nm', maxiter=1000)
print(nile_mleresults_1.summary())
-632.5376950475525
Optimization terminated successfully.
Current function value: 6.325377
Iterations: 107
Function evaluations: 214
Statespace Model Results
==============================================================================
Dep. Variable: volume No. Observations: 100
Model: FirstMLELocalLevel Log Likelihood -632.538
Date: Sun, 18 Apr 2021 AIC 1269.075
Time: 12:08:37 BIC 1274.266
Sample: 01-01-1871 HQIC 1271.175
- 01-01-1970
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
obs.var 1.511e+04 2586.966 5.840 0.000 1e+04 2.02e+04
level_var 1463.5474 843.718 1.735 0.083 -190.109 3117.203
===================================================================================
Ljung-Box (L1) (Q): 1.35 Jarque-Bera (JB): 0.05
Prob(Q): 0.24 Prob(JB): 0.98
Heteroskedasticity (H): 0.61 Skew: -0.03
Prob(H) (two-sided): 0.16 Kurtosis: 3.08
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
nile_mleresults_1.plot_diagnostics(figsize=(20,15))
C:\Users\yoshi\.virtualenvs\study_state_space-29aUVtuN\lib\site-packages\statsmodels\graphics\gofplots.py:993: UserWarning: marker is redundantly defined by the 'marker' keyword argument and the fmt string "bo" (-> marker='o'). The keyword argument will take precedence. ax.plot(x, y, fmt, **plot_style)
nile_fit_result = nile_mlemodel_1.fit(method='lbfgs',maxiter=100)
# 学習結果
print(nile_fit_result.summary())
# 学習結果(状態・トレンド)の描画
nile_fit_result.plot_diagnostics()
Statespace Model Results
==============================================================================
Dep. Variable: volume No. Observations: 100
Model: FirstMLELocalLevel Log Likelihood -634.625
Date: Sun, 18 Apr 2021 AIC 1273.250
Time: 12:12:18 BIC 1278.440
Sample: 01-01-1871 HQIC 1275.350
- 01-01-1970
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
obs.var 9416.0557 1896.898 4.964 0.000 5698.203 1.31e+04
level_var 4449.0905 1581.087 2.814 0.005 1350.216 7547.965
===================================================================================
Ljung-Box (L1) (Q): 0.30 Jarque-Bera (JB): 0.14
Prob(Q): 0.59 Prob(JB): 0.93
Heteroskedasticity (H): 0.65 Skew: 0.09
Prob(H) (two-sided): 0.21 Kurtosis: 2.95
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
C:\Users\yoshi\.virtualenvs\study_state_space-29aUVtuN\lib\site-packages\statsmodels\graphics\gofplots.py:993: UserWarning: marker is redundantly defined by the 'marker' keyword argument and the fmt string "bo" (-> marker='o'). The keyword argument will take precedence.
ax.plot(x, y, fmt, **plot_style)
# 最尤法で求めたパラメータ
nile_fit_result.params
obs.var 9416.055652 level_var 4449.090478 dtype: float64
# 最尤法で求めたパラメータでカルマンフィルタかける
nile_filtered_2 = nile_model_1.filter(nile_fit_result.params)
nile_filtered_1 = nile_model_1.filter([15099.0, 14969.1])
# print the filtered estimate of the unobserved level
print('filtering後のナイル川の流量:')
print(nile_filtered_1.filtered_state[0], '\n')
print('filtering後のナイル川の流量の分散')
print(nile_filtered_1.filtered_state_cov[0, 0])
plt.figure(figsize=(20,7))
sns.scatterplot(x=nile.index, y=nile, data=nile, label='observed_data')
filterd_state = pd.Series(nile_filtered_1.filtered_state[0])
sns.lineplot(x=nile.index, y=filterd_state, data=filterd_state, label='Filtered Level')
filterd_state = pd.Series(nile_filtered_2.filtered_state[0])
sns.lineplot(x=nile.index, y=filterd_state, data=filterd_state, label='estimated_feature_Filtered Level')
plt.legend()
<matplotlib.legend.Legend at 0x1a22260e340>
class MLELocalLevel(sm.tsa.statespace.MLEModel):
start_params = [1.0, 1.0]
param_names = ['obs.var', 'level.var']
def __init__(self, endog):
super(MLELocalLevel, self).__init__(endog, k_states=1)
self['design', 0, 0] = 1.0
self['transition', 0, 0] = 1.0
self['selection', 0, 0] = 1.0
self.initialize_approximate_diffuse()
self.loglikelihood_burn = 1
def transform_params(self, params):
return params**2
def untransform_params(self, params):
return params**0.5
def update(self, params, **kwargs):
# Transform the parameters if they are not yet transformed
params = super(MLELocalLevel, self).update(params, **kwargs)
self['obs_cov', 0, 0] = params[0]
self['state_cov', 0, 0] = params[1]
All of the code given above then applies equally to this new model, except that this class is robust to the optimizer selecting negative parameters.
[2] The transformations to induce stationarity are made available in this package as the functions sm.tsa.statespace.tools.constrain_stationary_univariate and sm.tsa.statespace.tools.constrain_stationary_multivariate. Their inverses are also available.