## AutoKerasのexample

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

# ボストンの住宅価格データ
from sklearn.datasets import load_boston

# 前処理
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 評価指標
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

import tensorflow as tf
import autokeras as ak

2021-08-30 06:59:39.745601: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-08-30 06:59:39.746732: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
# データセットの読込み
boston = load_boston()

# 説明変数の格納
df = pd.DataFrame(boston.data, columns=boston.feature_names)
# 目的変数の追加
df["MEDV"] = boston.target

# データの中身を確認
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [4]:
# ランダムシード値
RANDOM_STATE = 10

# 学習データと評価データの割合
TEST_SIZE = 0.2

# 学習データと評価データを作成
x_train, x_test, y_train, y_test = train_test_split(
    df.iloc[:, 0 : df.shape[1] - 1],
    df.iloc[:, df.shape[1] - 1],
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
)

df_train = pd.concat([x_train, y_train], axis=1)
df_test = pd.concat([x_test, y_test], axis=1)

feature_names = boston.feature_names
label_name = ["MEDV"]

In [5]:
reg = ak.StructuredDataRegressor(
    overwrite=True,
    max_trials=10,  # It tries 10 different models.
)

In [6]:
reg.fit(
    x=df_train[feature_names],
    y=df_train[label_name],
    validation_split=0.2,
)

Trial 10 Complete [00h 01m 24s]
val_loss: 21.92863655090332

Best val_loss So Far: 16.764902114868164
Total elapsed time: 00h 54m 43s
INFO:tensorflow:Oracle triggered exit
Epoch 1/307
Epoch 2/307
Epoch 3/307
Epoch 4/307
Epoch 5/307
Epoch 6/307
Epoch 7/307
Epoch 8/307
Epoch 9/307
Epoch 10/307
Epoch 11/307
Epoch 12/307
Epoch 13/307
Epoch 14/307
Epoch 15/307
Epoch 16/307
Epoch 17/307
Epoch 18/307
Epoch 19/307
Epoch 20/307
Epoch 21/307
Epoch 22/307
Epoch 23/307
Epoch 24/307
Epoch 25/307
Epoch 26/307
Epoch 27/307
Epoch 28/307
Epoch 29/307
Epoch 30/307
Epoch 31/307
Epoch 32/307
Epoch 33/307
Epoch 34/307
Epoch 35/307
Epoch 36/307
Epoch 37/307
Epoch 38/307
Epoch 39/307
Epoch 40/307
Epoch 41/307
Epoch 42/307
Epoch 43/307
Epoch 44/307
Epoch 45/307
Epoch 46/307
Epoch 47/307
Epoch 48/307
Epoch 49/307
Epoch 50/307
Epoch 51/307
Epoch 52/307
Epoch 53/307
Epoch 54/307
Epoch 55/307
Epoch 56/307
Epoch 57/307
Epoch 58/307
Epoch 59/307
Epoch 60/307
Epoch 61/307
Epoch 62/307
Epoch 63/307
Epoch 64/307
Epoch

2021-08-30 07:55:39.262893: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: ./structured_data_regressor/best_model/assets


In [7]:
model = reg.export_model()
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 13)]              0         
_________________________________________________________________
multi_category_encoding (Mul (None, 13)                0         
_________________________________________________________________
normalization (Normalization (None, 13)                27        
_________________________________________________________________
dense (Dense)                (None, 32)                448       
_________________________________________________________________
re_lu (ReLU)                 (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               16896     
_________________________________________________________________
re_lu_1 (ReLU)               (None, 512)               0     

In [8]:
y_pred = reg.predict(x=df_test[feature_names])



In [9]:
# 評価
def calculate_scores(true, pred):
    """全ての評価指標を計算する

    Parameters
    ----------
    true (np.array)       : 実測値
    pred (np.array)       : 予測値

    Returns
    -------
    scores (pd.DataFrame) : 各評価指標を纏めた結果

    """
    scores = {}
    scores = pd.DataFrame(
        {
            "R2": r2_score(true, pred),
            "MAE": mean_absolute_error(true, pred),
            "MSE": mean_squared_error(true, pred),
            "RMSE": np.sqrt(mean_squared_error(true, pred)),
        },
        index=["scores"],
    )
    return scores

In [10]:
scores = calculate_scores(y_test, y_pred)
print(scores)

              R2       MAE        MSE      RMSE
scores  0.871147  2.724048  13.475576  3.670909
