#ライブラリインストール
import numpy as np
import pandas as pd
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.utils import np_utils
from keras import backend as K
from keras.wrappers.scikit_learn import KerasRegressor
# データ用意
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names) # 説明変数
Y = pd.DataFrame(boston.target, columns=['Price']) # 目的変数
print(X.head(3))
print(Y.head(3))
# データ正規化
# 今回は、色データではないので、説明変数、目的変数どちらも平均0、分散1で標準化
# 説明変数
sc = StandardScaler()
X = sc.fit_transform(X)
print(pd.DataFrame(X).describe()) # 確認用
# 目的変数を標準化
sc = StandardScaler()
sc.fit(Y)
Y = sc.transform(Y) # scは、予測値を戻す時に再利用する
pd.DataFrame(Y).describe() # 確認用
# 学習データを訓練用、テスト用に分ける
train_x, test_x, train_y, test_y = train_test_split(X, Y)
# モデル作成
def build_model(activation, optimizer):
model = Sequential()
model.add(Dense(50, activation=activation, input_shape=(train_x.shape[1],)))
model.add(Dropout(0.2))
model.add(Dense(50, activation=activation))
model.add(Dropout(0.2))
model.add(Dense(1))
optimizer = tf.keras.optimizers.RMSprop(0.0008)
model.compile(
loss='mean_squared_error',
optimizer=optimizer,
metrics=['mean_absolute_error', 'mean_squared_error']
)
return model
# パラメータのランダムサーチ準備
activation = ['relu', 'sigmoid']
optimizer = ['adam', 'adagrad']
epochs = [i for i in range(10, 111, 20)]
batch_size = [i for i in range(5, 30, 5)]
param_dist = dict(activation=activation,
optimizer=optimizer,
epochs=epochs,
batch_size=batch_size)
# モデル作成
model = KerasRegressor(build_fn=build_model, verbose=1)
# RandamizeSearchの実行(cv=3)
R_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, cv=3, n_jobs=-1)
R_result = R_search.fit(train_x, train_y)
# 結果の確認
print(R_result.best_params_)
print(R_result.best_score_)
# テストデータでスコア確認
# テストデータで予測
eval_num = R_search.predict(test_x)
# スコア
ab_error = mean_absolute_error(eval_num, test_y)
me_error = mean_squared_error(eval_num, test_y)
print("mean_absolute_error: ", str(round(ab_error,3)))
print("標準化から戻した誤差: ", sc.inverse_transform([ab_error,]))
print("mean_squared_error: ", str(round(me_error, 3)))
print("標準化から戻した誤差: ", sc.inverse_transform([me_error,]))
from sklearn.ensemble import RandomForestRegressor
param_dict = {"max_depth": [i for i in range(2,23,4)],
"n_estimators": [i for i in range(50, 501, 50)],
"max_features": [i for i in range(1, 10, 2)],
"min_samples_split": [2, 3, 10],
"min_samples_leaf": [1, 3, 10],
"bootstrap": [True, False],
}
R_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=8),
param_distributions=param_dict,
scoring="neg_mean_squared_error",
cv=3, n_jobs=-1)
R_result = R_search.fit(train_x, train_y)
# 結果の確認
print(R_result.best_params_)
print(R_result.best_score_)
# テストデータでスコア確認
# テストデータで予測
eval_num = R_search.predict(test_x)
# スコア
ab_error = mean_absolute_error(eval_num, test_y)
me_error = mean_squared_error(eval_num, test_y)
print("mean_absolute_error: ", str(round(ab_error,3)))
print("標準化から戻した誤差: ", sc.inverse_transform([ab_error,]))
print("mean_squared_error: ", str(round(me_error, 3)))
print("標準化から戻した誤差: ", sc.inverse_transform([me_error,]))