ボストン住宅価格予測を用いて、Kerasのハイパーパラメータをチューニング

In [61]:
#ライブラリインストール

import numpy as np
import pandas as pd

from tensorflow import keras
import tensorflow as tf
from tensorflow.keras import layers

from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.utils import np_utils
from keras import backend as K
from keras.wrappers.scikit_learn import KerasRegressor
In [2]:
# データ用意
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names) # 説明変数
Y = pd.DataFrame(boston.target, columns=['Price']) # 目的変数
print(X.head(3))
print(Y.head(3))
      CRIM    ZN  INDUS  CHAS    NOX  ...  RAD    TAX  PTRATIO       B  LSTAT
0  0.00632  18.0   2.31   0.0  0.538  ...  1.0  296.0     15.3  396.90   4.98
1  0.02731   0.0   7.07   0.0  0.469  ...  2.0  242.0     17.8  396.90   9.14
2  0.02729   0.0   7.07   0.0  0.469  ...  2.0  242.0     17.8  392.83   4.03

[3 rows x 13 columns]
   Price
0   24.0
1   21.6
2   34.7
In [3]:
# データ正規化
# 今回は、色データではないので、説明変数、目的変数どちらも平均0、分散1で標準化

# 説明変数
sc = StandardScaler()
X = sc.fit_transform(X)
print(pd.DataFrame(X).describe()) # 確認用
                 0             1   ...            11            12
count  5.060000e+02  5.060000e+02  ...  5.060000e+02  5.060000e+02
mean  -8.787437e-17 -6.343191e-16  ...  8.163101e-15 -3.370163e-16
std    1.000990e+00  1.000990e+00  ...  1.000990e+00  1.000990e+00
min   -4.197819e-01 -4.877224e-01  ... -3.907193e+00 -1.531127e+00
25%   -4.109696e-01 -4.877224e-01  ...  2.050715e-01 -7.994200e-01
50%   -3.906665e-01 -4.877224e-01  ...  3.811865e-01 -1.812536e-01
75%    7.396560e-03  4.877224e-02  ...  4.336510e-01  6.030188e-01
max    9.933931e+00  3.804234e+00  ...  4.410519e-01  3.548771e+00

[8 rows x 13 columns]
In [4]:
# 目的変数を標準化
sc = StandardScaler()
sc.fit(Y)
Y = sc.transform(Y) # scは、予測値を戻す時に再利用する
pd.DataFrame(Y).describe() # 確認用
Out[4]:
0
count 5.060000e+02
mean -4.247810e-16
std 1.000990e+00
min -1.908226e+00
25% -5.994557e-01
50% -1.450593e-01
75% 2.685231e-01
max 2.989460e+00
In [11]:
# 学習データを訓練用、テスト用に分ける
train_x, test_x, train_y, test_y = train_test_split(X, Y)
In [12]:
# モデル作成
def build_model(activation, optimizer):
    
    model = Sequential()
    model.add(Dense(50, activation=activation, input_shape=(train_x.shape[1],)))
    model.add(Dropout(0.2))
    model.add(Dense(50, activation=activation))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    
    optimizer = tf.keras.optimizers.RMSprop(0.0008)

    model.compile(
        loss='mean_squared_error',
        optimizer=optimizer,
        metrics=['mean_absolute_error', 'mean_squared_error']
    )

    return model
In [41]:
# パラメータのランダムサーチ準備
activation = ['relu', 'sigmoid']
optimizer = ['adam', 'adagrad']
epochs = [i for i in range(10, 111, 20)]
batch_size = [i for i in range(5, 30, 5)]

param_dist = dict(activation=activation,
                  optimizer=optimizer,
                  epochs=epochs,
                  batch_size=batch_size)

# モデル作成
model = KerasRegressor(build_fn=build_model, verbose=1)

# RandamizeSearchの実行(cv=3)
R_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, cv=3, n_jobs=-1)
R_result = R_search.fit(train_x, train_y)
Epoch 1/70
16/16 [==============================] - 0s 1ms/step - loss: 0.9313 - mean_absolute_error: 0.7321 - mean_squared_error: 0.9313
Epoch 2/70
16/16 [==============================] - 0s 1ms/step - loss: 0.5828 - mean_absolute_error: 0.5335 - mean_squared_error: 0.5828
Epoch 3/70
16/16 [==============================] - 0s 1ms/step - loss: 0.5014 - mean_absolute_error: 0.4921 - mean_squared_error: 0.5014
Epoch 4/70
16/16 [==============================] - 0s 1ms/step - loss: 0.3624 - mean_absolute_error: 0.4262 - mean_squared_error: 0.3624
Epoch 5/70
16/16 [==============================] - 0s 1ms/step - loss: 0.3769 - mean_absolute_error: 0.4404 - mean_squared_error: 0.3769
Epoch 6/70
16/16 [==============================] - 0s 1ms/step - loss: 0.3314 - mean_absolute_error: 0.4164 - mean_squared_error: 0.3314
Epoch 7/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2833 - mean_absolute_error: 0.3803 - mean_squared_error: 0.2833
Epoch 8/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2834 - mean_absolute_error: 0.3800 - mean_squared_error: 0.2834
Epoch 9/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2681 - mean_absolute_error: 0.3593 - mean_squared_error: 0.2681
Epoch 10/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2913 - mean_absolute_error: 0.3712 - mean_squared_error: 0.2913
Epoch 11/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2512 - mean_absolute_error: 0.3648 - mean_squared_error: 0.2512
Epoch 12/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2346 - mean_absolute_error: 0.3475 - mean_squared_error: 0.2346
Epoch 13/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2639 - mean_absolute_error: 0.3588 - mean_squared_error: 0.2639
Epoch 14/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2276 - mean_absolute_error: 0.3305 - mean_squared_error: 0.2276
Epoch 15/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2192 - mean_absolute_error: 0.3172 - mean_squared_error: 0.2192
Epoch 16/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2227 - mean_absolute_error: 0.3246 - mean_squared_error: 0.2227
Epoch 17/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2136 - mean_absolute_error: 0.3303 - mean_squared_error: 0.2136
Epoch 18/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2013 - mean_absolute_error: 0.3061 - mean_squared_error: 0.2013
Epoch 19/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1955 - mean_absolute_error: 0.3196 - mean_squared_error: 0.1955
Epoch 20/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1932 - mean_absolute_error: 0.3024 - mean_squared_error: 0.1932
Epoch 21/70
16/16 [==============================] - 0s 1ms/step - loss: 0.2159 - mean_absolute_error: 0.3293 - mean_squared_error: 0.2159
Epoch 22/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1784 - mean_absolute_error: 0.2986 - mean_squared_error: 0.1784
Epoch 23/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1862 - mean_absolute_error: 0.3023 - mean_squared_error: 0.1862
Epoch 24/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1723 - mean_absolute_error: 0.2899 - mean_squared_error: 0.1723
Epoch 25/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1496 - mean_absolute_error: 0.2867 - mean_squared_error: 0.1496
Epoch 26/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1728 - mean_absolute_error: 0.2945 - mean_squared_error: 0.1728
Epoch 27/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1760 - mean_absolute_error: 0.2969 - mean_squared_error: 0.1760
Epoch 28/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1753 - mean_absolute_error: 0.2986 - mean_squared_error: 0.1753
Epoch 29/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1681 - mean_absolute_error: 0.2818 - mean_squared_error: 0.1681
Epoch 30/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1569 - mean_absolute_error: 0.2856 - mean_squared_error: 0.1569
Epoch 31/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1679 - mean_absolute_error: 0.2839 - mean_squared_error: 0.1679
Epoch 32/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1524 - mean_absolute_error: 0.2736 - mean_squared_error: 0.1524
Epoch 33/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1974 - mean_absolute_error: 0.3078 - mean_squared_error: 0.1974
Epoch 34/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1775 - mean_absolute_error: 0.2848 - mean_squared_error: 0.1775
Epoch 35/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1730 - mean_absolute_error: 0.2807 - mean_squared_error: 0.1730
Epoch 36/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1873 - mean_absolute_error: 0.2956 - mean_squared_error: 0.1873
Epoch 37/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1754 - mean_absolute_error: 0.2900 - mean_squared_error: 0.1754
Epoch 38/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1521 - mean_absolute_error: 0.2746 - mean_squared_error: 0.1521
Epoch 39/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1575 - mean_absolute_error: 0.2748 - mean_squared_error: 0.1575
Epoch 40/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1457 - mean_absolute_error: 0.2616 - mean_squared_error: 0.1457
Epoch 41/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1460 - mean_absolute_error: 0.2717 - mean_squared_error: 0.1460
Epoch 42/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1602 - mean_absolute_error: 0.2773 - mean_squared_error: 0.1602
Epoch 43/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1448 - mean_absolute_error: 0.2693 - mean_squared_error: 0.1448
Epoch 44/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1909 - mean_absolute_error: 0.2963 - mean_squared_error: 0.1909
Epoch 45/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1557 - mean_absolute_error: 0.2830 - mean_squared_error: 0.1557
Epoch 46/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1361 - mean_absolute_error: 0.2690 - mean_squared_error: 0.1361
Epoch 47/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1283 - mean_absolute_error: 0.2502 - mean_squared_error: 0.1283
Epoch 48/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1614 - mean_absolute_error: 0.2805 - mean_squared_error: 0.1614
Epoch 49/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1342 - mean_absolute_error: 0.2579 - mean_squared_error: 0.1342
Epoch 50/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1567 - mean_absolute_error: 0.2790 - mean_squared_error: 0.1567
Epoch 51/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1360 - mean_absolute_error: 0.2574 - mean_squared_error: 0.1360
Epoch 52/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1546 - mean_absolute_error: 0.2754 - mean_squared_error: 0.1546
Epoch 53/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1477 - mean_absolute_error: 0.2632 - mean_squared_error: 0.1477
Epoch 54/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1550 - mean_absolute_error: 0.2694 - mean_squared_error: 0.1550
Epoch 55/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1417 - mean_absolute_error: 0.2654 - mean_squared_error: 0.1417
Epoch 56/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1351 - mean_absolute_error: 0.2551 - mean_squared_error: 0.1351
Epoch 57/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1626 - mean_absolute_error: 0.2671 - mean_squared_error: 0.1626
Epoch 58/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1498 - mean_absolute_error: 0.2706 - mean_squared_error: 0.1498
Epoch 59/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1198 - mean_absolute_error: 0.2508 - mean_squared_error: 0.1198
Epoch 60/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1329 - mean_absolute_error: 0.2670 - mean_squared_error: 0.1329
Epoch 61/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1306 - mean_absolute_error: 0.2604 - mean_squared_error: 0.1306
Epoch 62/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1441 - mean_absolute_error: 0.2653 - mean_squared_error: 0.1441
Epoch 63/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1387 - mean_absolute_error: 0.2675 - mean_squared_error: 0.1387
Epoch 64/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1312 - mean_absolute_error: 0.2516 - mean_squared_error: 0.1312
Epoch 65/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1117 - mean_absolute_error: 0.2381 - mean_squared_error: 0.1117
Epoch 66/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1502 - mean_absolute_error: 0.2616 - mean_squared_error: 0.1502
Epoch 67/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1375 - mean_absolute_error: 0.2543 - mean_squared_error: 0.1375
Epoch 68/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1239 - mean_absolute_error: 0.2530 - mean_squared_error: 0.1239
Epoch 69/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1148 - mean_absolute_error: 0.2316 - mean_squared_error: 0.1148
Epoch 70/70
16/16 [==============================] - 0s 1ms/step - loss: 0.1089 - mean_absolute_error: 0.2377 - mean_squared_error: 0.1089
In [42]:
# 結果の確認
print(R_result.best_params_)
print(R_result.best_score_)
{'optimizer': 'adagrad', 'epochs': 70, 'batch_size': 25, 'activation': 'relu'}
-0.1566698451836904
In [43]:
# テストデータでスコア確認

# テストデータで予測
eval_num = R_search.predict(test_x)

# スコア
ab_error = mean_absolute_error(eval_num, test_y)
me_error = mean_squared_error(eval_num, test_y)
print("mean_absolute_error: ", str(round(ab_error,3)))
print("標準化から戻した誤差: ", sc.inverse_transform([ab_error,]))
print("mean_squared_error: ", str(round(me_error, 3)))
print("標準化から戻した誤差: ", sc.inverse_transform([me_error,]))
6/6 [==============================] - 0s 1ms/step
mean_absolute_error:  0.24
標準化から戻した誤差:  [24.74120674]
mean_squared_error:  0.153
標準化から戻した誤差:  [23.942141]

補:上記を、前処理なしでRandomForestで予測した場合

In [40]:
from sklearn.ensemble import RandomForestRegressor
In [69]:
 
In [70]:
param_dict = {"max_depth": [i for i in range(2,23,4)],
              "n_estimators": [i for i in range(50, 501, 50)],
              "max_features": [i for i in range(1, 10, 2)],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
}

R_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=8), 
                              param_distributions=param_dict,
                              scoring="neg_mean_squared_error",
                              cv=3, n_jobs=-1)
In [71]:
R_result = R_search.fit(train_x, train_y)
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py:739: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  self.best_estimator_.fit(X, y, **fit_params)
In [72]:
# 結果の確認
print(R_result.best_params_)
print(R_result.best_score_)
{'n_estimators': 150, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': 7, 'max_depth': 10, 'bootstrap': True}
-0.14709793511335337
In [73]:
# テストデータでスコア確認

# テストデータで予測
eval_num = R_search.predict(test_x)

# スコア
ab_error = mean_absolute_error(eval_num, test_y)
me_error = mean_squared_error(eval_num, test_y)
print("mean_absolute_error: ", str(round(ab_error,3)))
print("標準化から戻した誤差: ", sc.inverse_transform([ab_error,]))
print("mean_squared_error: ", str(round(me_error, 3)))
print("標準化から戻した誤差: ", sc.inverse_transform([me_error,]))
mean_absolute_error:  0.251
標準化から戻した誤差:  [24.84154319]
mean_squared_error:  0.153
標準化から戻した誤差:  [23.93806033]
In [ ]: