import pandas as pd
import matplotlib.pyplot as plt
import io
import requests
# 飛行機乗客数データ
url = "https://www.analyticsvidhya.com/wp-content/uploads/2016/02/AirPassengers.csv"
stream = requests.get(url).content
content = pd.read_csv(io.StringIO(stream.decode('utf-8')), index_col='Month', parse_dates=True, dtype='float')
content.head()
passengers = content['#Passengers'][:120]
passengers_plot = content['#Passengers']
plt.plot(passengers_plot)
plt.show()
from statsmodels import api as s_api
result = s_api.tsa.stattools.adfuller(passengers)
result
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('critical Values:')
for key, value in result[4].items():
print('臨界値\t%s: %.3f' % (key, value))
s_api.graphics.tsa.plot_acf(passengers, lags=35) # 自己相関表示
s_api.graphics.tsa.plot_pacf(passengers, lags=35) # 偏自己相関表示
plt.show()
from statsmodels import tsa
import warnings
warnings.filterwarnings('ignore')
# ARのorder選択
for i in range(10):
model = tsa.arima.model.ARIMA(passengers, order=(i+1, 0, 0)).fit()
print('order ',i,"'s AIC: ", model.aic)
model = tsa.arima.model.ARIMA(passengers, order=(9, 0, 0)).fit()
# 残差の確認
resid = model.resid
resid[:5]
fig = plt.figure(figsize=(5,8))
ax1 = fig.add_subplot(211)
fig = s_api.graphics.tsa.plot_acf(resid, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = s_api.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)
pred = model.predict('1955-01-01', '1958-12-01')
plt.figure(figsize=(6,5))
plt.plot(passengers[70:120],'--')
plt.plot(pred, "k")
plt.xticks(rotation=45)
pred = model.predict('1958-01-01', '1965-12-01')
plt.figure(figsize=(4,5))
plt.plot(passengers[40:],'--')
plt.plot(pred, "k")
plt.xticks(rotation=45)
plt.show()
# ARモデルでの結果を参照
# ARのorder選択
for i in range(10):
model = tsa.arima.model.ARIMA(passengers, order=(0, 0, i+1)).fit()
print('order ',i,"'s AIC: ", model.aic, "'s BIC: ", model.bic)
model = tsa.arima.model.ARIMA(passengers, order=(0, 0, 6)).fit()
resid = model.resid
resid
fig = plt.figure(figsize=(5,8))
ax1 = fig.add_subplot(211)
fig = s_api.graphics.tsa.plot_acf(resid, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = s_api.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)
pred = model.predict('1955-01-01', '1958-12-01')
plt.figure(figsize=(6,5))
plt.plot(passengers[70:120],'--', label='real value')
plt.plot(pred, "k", label='predicted value')
plt.xticks(rotation=45)
pred = model.predict('1958-01-01', '1965-12-01')
plt.figure(figsize=(6,5))
plt.plot(passengers[40:],'--', label='real value')
plt.plot(pred, "k", label='predicted value')
plt.xticks(rotation=45)
plt.legend()
plt.show()
# ARモデルでの結果参照
# モデルの選定
sellected_order = tsa.stattools.arma_order_select_ic(passengers, max_ar=10, max_ma=10, ic='aic')
# 選定結果
sellected_order.aic_min_order
model = tsa.arima.model.ARIMA(passengers, order=(8, 0, 9)).fit()
resid = model.resid
resid
fig = plt.figure(figsize=(5,8))
ax1 = fig.add_subplot(211)
fig = s_api.graphics.tsa.plot_acf(resid, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = s_api.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)
pred = model.predict('1955-01-01', '1958-12-01')
plt.figure(figsize=(6,5))
plt.plot(passengers[70:120],'--', label='real value')
plt.plot(pred, "k", label='predicted value')
plt.xticks(rotation=45)
pred = model.predict('1958-01-01', '1965-12-01')
plt.figure(figsize=(6,5))
plt.plot(passengers[40:],'--', label='real value')
plt.plot(pred, "k", label='predicted value')
plt.xticks(rotation=45)
plt.legend()
plt.show()
import numpy as np
# ARIMAのorder選択
# まずデータを差分して定常性にする
diffed_passengers = np.diff(passengers)
diffed_passengers
# モデルの選定
sellected_order = tsa.stattools.arma_order_select_ic(diffed_passengers, max_ar=10, max_ma=10, ic='aic')
# 選定結果
sellected_order.aic_min_order
# モデル
model = tsa.arima.model.ARIMA(passengers, order=(10, 1, 4)).fit()
resid = model.resid
resid
fig = plt.figure(figsize=(5,8))
ax1 = fig.add_subplot(211)
fig = s_api.graphics.tsa.plot_acf(resid, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = s_api.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)
pred = model.predict('1955-01-01', '1958-12-01')
plt.figure(figsize=(6,5))
plt.plot(passengers[70:120],'--', label='real value')
plt.plot(pred, "k", label='predicted value')
plt.xticks(rotation=45)
pred = model.predict('1958-01-01', '1965-12-01')
plt.figure(figsize=(6,5))
plt.plot(passengers[40:],'--', label='real value')
plt.plot(pred, "k", label='predicted value')
plt.xticks(rotation=45)
plt.legend()
plt.show()
# 残差のプロット
plt.plot(resid)
# モデル
model = tsa.statespace.sarimax.SARIMAX(passengers, order=(10,1,4), seasonal_order=(1,1,1,12)).fit()
resid = model.resid
resid
fig = plt.figure(figsize=(5,8))
ax1 = fig.add_subplot(211)
fig = s_api.graphics.tsa.plot_acf(resid, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = s_api.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)
pred = model.predict('1955-01-01', '1958-12-01')
plt.figure(figsize=(6,5))
plt.plot(passengers[70:120],'--', label='real value')
plt.plot(pred, "k", label='predicted value')
plt.xticks(rotation=45)
pred = model.predict('1958-01-01', '1965-12-01')
plt.figure(figsize=(6,5))
plt.plot(passengers[40:],'--', label='real value')
plt.plot(pred, "k", label='predicted value')
plt.xticks(rotation=45)
plt.legend()
plt.show()
# 残差のプロット
plt.plot(resid)