# Sprint 14　Keras

# ライブラリimport

In [1]:
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.optimizers import Adam
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder
from keras.datasets import mnist

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

# 問題1　公式チュートリアルモデルを分担して実行

こちらのチュートリアルを実行：https://www.tensorflow.org/tutorials/quickstart/beginner

In [2]:
# MNIST読み込み
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 正規化
x_train, x_test = x_train / 255.0, x_test / 255.0

In [3]:
# モデル王増の定義
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [4]:
# 予測値を出してみる（1データだけ使用）
predictions = model(x_train[:1]).numpy()
predictions

array([[-0.13093692, -0.47392577,  0.26107258,  0.35263106, -0.0790967 ,
        -0.9014666 , -0.54679674,  0.05213387,  0.17111629, -0.5176721 ]],
      dtype=float32)

In [5]:
# softmax関数に通す
tf.nn.softmax(predictions).numpy()

array([[0.09784736, 0.069437  , 0.14480938, 0.15869382, 0.10305357,
        0.04528061, 0.06455702, 0.11750484, 0.13235159, 0.06646487]],
      dtype=float32)

In [6]:
# 損失関数定義
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [7]:
# 損失算出
loss_fn(y_train[:1], predictions).numpy()

3.0948763

In [8]:
# コンパイル（計算や予測を出来る状態にする）
model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy']
)

In [9]:
# 学習実行
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x20a69033640>

In [10]:
# 評価(損失とACC)
model.evaluate(x_test,  y_test, verbose=3)

[0.07326526194810867, 0.9776999950408936]

In [11]:
# モデルが直接確率を返してくれるようにする
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [12]:
# 試しに予測
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[8.3041796e-09, 5.6546340e-10, 1.0115916e-05, 5.3007196e-05,
        5.4635467e-12, 1.8159639e-07, 2.8282157e-14, 9.9992692e-01,
        1.8998624e-07, 9.5465602e-06],
       [1.2725554e-08, 1.6306241e-05, 9.9997079e-01, 1.2123411e-05,
        2.0013726e-12, 4.6414650e-07, 2.1152909e-08, 2.1683359e-14,
        2.2905856e-07, 4.3922043e-11],
       [9.4939040e-08, 9.9940300e-01, 8.9352703e-05, 1.3640619e-05,
        2.9713123e-05, 3.0299407e-06, 3.1929144e-06, 3.9627586e-04,
        6.1507861e-05, 3.4474957e-07],
       [9.9968910e-01, 1.8258562e-07, 3.8924311e-05, 9.3883294e-08,
        3.5771245e-07, 5.4292595e-06, 2.3342160e-04, 2.5478857e-05,
        8.0399296e-09, 6.8240870e-06],
       [2.0514960e-06, 7.9705620e-09, 3.0756717e-06, 6.9470184e-07,
        9.9769837e-01, 2.9296822e-07, 5.1633247e-05, 2.2097697e-04,
        5.1294965e-06, 2.0178349e-03]], dtype=float32)>

# 問題3

## データ準備

In [13]:
# 読み込み
iris_dataset = load_iris()

# 整形
iris_dataframe = pd.DataFrame(data=iris_dataset.data, columns=iris_dataset.feature_names)
iris_datalabel = pd.DataFrame(data=iris_dataset.target,columns=['Species'])
df = pd.concat([iris_dataframe,iris_datalabel],axis=1)

In [14]:
# 2値分類のため絞り込み
df2 = df[(df["Species"] == 0)|(df["Species"] == 1)]

# 説明変数と目的変数に分割
y = df2["Species"]
X = df2.loc[:, ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]]
y = np.array(y)
X = np.array(X)
y = y.astype(np.int)[:, np.newaxis]

# 訓練データ/テストデータ/評価データに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# 正規化
X_train /= 255
X_test /= 255
X_val /= 255

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = y.astype(np.int)[:, np.newaxis]


## モデル定義

In [15]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(4,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(1, activation = tf.nn.sigmoid))

In [16]:
# モデル構造の確認
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 50)                250       
_________________________________________________________________
dense_3 (Dense)              (None, 100)               5100      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 101       
Total params: 5,451
Trainable params: 5,451
Non-trainable params: 0
_________________________________________________________________


In [17]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [18]:
# モデルのコンパイル
model.compile(
    loss='binary_crossentropy',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [19]:
# 学習実行
model.fit(
    X_train,
    y_train,
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x20a7b4cbeb0>

In [20]:
# 2値分類のうちどちらに分類されるか
y_pred_proba = model.predict(X_test)[:, 0]
y_pred = np.where(y_pred_proba >0.5, 1, 0)
print("y_pred_proba", y_pred_proba)
print("y_pred", y_pred)
print('y_test', y_test.ravel())

y_pred_proba [6.7043304e-04 9.9999952e-01 6.5224071e-05 9.9999660e-01 9.9999797e-01
 9.9707991e-01 9.1292561e-05 9.9999815e-01 9.9999940e-01 9.9996769e-01
 9.9998474e-01 9.9996465e-01 9.9999869e-01 1.5782511e-05 1.4558434e-04
 3.4368038e-04 7.3590263e-06 7.8523159e-04 3.0973708e-05 1.4168024e-04]
y_pred [0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0]
y_test [0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0]


In [21]:
# 損失とACC
score = model.evaluate(X_test, y_test, verbose=0)
print('test loss:', score[0])
print('test accuracy:', score[1])

test loss: 0.00026573744253255427
test accuracy: 1.0


# 問題4

## データ準備

In [22]:
# 説明変数と目的変数に分割
y = df["Species"]
X = df.loc[:, ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]]
y = np.array(y)
X = np.array(X)
y = y.astype(np.int)[:, np.newaxis]

# 訓練データ/テストデータ/評価データに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# onehotベクトル化
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train)
y_val_one_hot = enc.transform(y_val)
y_test_one_hot = enc.transform(y_test)

# 正規化
X_train /= 255
X_test /= 255
X_val /= 255

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = y.astype(np.int)[:, np.newaxis]


## モデル定義

In [23]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(4,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(3, activation = tf.nn.softmax))

In [24]:
# モデル構造の確認
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 50)                250       
_________________________________________________________________
dense_6 (Dense)              (None, 100)               5100      
_________________________________________________________________
dense_7 (Dense)              (None, 3)                 303       
Total params: 5,653
Trainable params: 5,653
Non-trainable params: 0
_________________________________________________________________


In [25]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [26]:
# モデルのコンパイル
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [27]:
# 学習実行
model.fit(
    X_train,
    y_train_one_hot,
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val_one_hot)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


<tensorflow.python.keras.callbacks.History at 0x20a6c19f7c0>

In [28]:
# 予測値の最大インデックスを出力クラスとする
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print("y_pred", y_pred)
print('y_test', y_test.ravel())

y_pred [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]
y_test [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]


In [29]:
# 損失とACC
score = model.evaluate(X_test, y_test_one_hot, verbose=0)
print('Train loss:', score[0])
print('Train accuracy:', score[1])

Train loss: 0.12088367342948914
Train accuracy: 1.0


# 問題5

## データの準備

In [30]:
dataset_path ="train.csv"
df = pd.read_csv(dataset_path)

y = df["SalePrice"]
X = df.loc[:, ["GrLivArea", "YearBuilt"]]
y = np.array(y)
X = np.array(X)
y = y.astype(np.int)[:, np.newaxis]
y = np.log(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = y.astype(np.int)[:, np.newaxis]


## モデル定義

In [31]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(2,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(3, activation = tf.keras.activations.linear))

In [32]:
# モデル構造の確認
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 50)                150       
_________________________________________________________________
dense_9 (Dense)              (None, 100)               5100      
_________________________________________________________________
dense_10 (Dense)             (None, 3)                 303       
Total params: 5,553
Trainable params: 5,553
Non-trainable params: 0
_________________________________________________________________


In [33]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [34]:
# モデルのコンパイル
model.compile(
    loss='mse',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['mse']
)

In [35]:
# 学習実行
model.fit(
    X_train,
    y_train,
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


<tensorflow.python.keras.callbacks.History at 0x20a6a60a820>

In [36]:
# 予測値出力
y_pred_proba = model.predict(X_test)
y_pred = y_pred_proba
print("y_pred", y_pred.ravel()[:5])
print('y_test', y_test.ravel()[:5])

y_pred [11.802851  12.634994  10.108998  11.290144  11.7639885]
y_test [12.20918779 11.79810441 11.60823564 12.16525065 11.38509209]


In [37]:
# 損失とACC
score = model.evaluate(X_test, y_test, verbose=0)
print('loss (test):', score[0])
print('Mean Squared Error (test):', score[1])

loss (test): 10.607439994812012
Mean Squared Error (test): 10.607439994812012


# 問題6

## データ準備

In [38]:
# 読み込み
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#　平滑化
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

# 正規化
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255

# 変形
y_train = y_train.astype(np.int)[:, np.newaxis]
y_test = y_test.astype(np.int)[:, np.newaxis]

# one-hotベクトル化
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:])
y_test_one_hot = enc.fit_transform(y_test[:])

# 分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train_one_hot, test_size=0.2)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X_train = X_train.astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X_test = X_test.astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train = y_train.astype(np.int)[:, np.newaxis]
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_test = y_test.astype(np.int)[:, np.newaxis]


## モデル定義

In [39]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(784,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(10, activation = tf.nn.softmax))

In [40]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [41]:
# モデルのコンパイル
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [42]:
# 学習実行
model.fit(
    X_train[:1000],
    y_train[:1000],
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100


<tensorflow.python.keras.callbacks.History at 0x20a6bf4f9d0>

In [43]:
# 予測値出力
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print("y_pred", y_pred[:10])
print('y_test', y_test.ravel()[:10])

y_pred [7 2 1 0 4 1 9 9 6 9]
y_test [7 2 1 0 4 1 4 9 5 9]


In [44]:
# 損失とACC
score = model.evaluate(X_test, y_test_one_hot, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 1.4795786142349243
Test accuracy: 0.7802000045776367
