## ~匯入所有需要的套件模組

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# tensorflow.keras
import tensorflow as tf
print("Tensorflow version =", tf.__version__)
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras import initializers, regularizers, optimizers, layers
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras import backend as K

# 準備資料集

In [None]:
#@title 下載並解壓縮 MedNIST 資料
#@markdown 來源: https://medmnist.github.io/
!pip install -q --upgrade --no-cache-dir gdown
!gdown --id 1Jcrw4sN3V1GWPy2gAIjopvs6HDVvGiz2  # MedNIST.zip
!unzip -n -q 'MedNIST.zip' -d 'MedNIST'
print("... done")

In [None]:
# 一個 npz 裏面包含了多個 npy(numpy array 的存檔)
# open a npz file of MedNIST
data = np.load('MedNIST/pathmnist.npz')  # 選一個資料集項目
npy_file_lst = data.files
for npy in npy_file_lst:
    print(npy)
    # print(data[npy])

In [None]:
# 準備 image 和 label 的 numpy array
# images
train_images_arr = data['train_images']
valid_images_arr = data['val_images']
test_images_arr = data['test_images']
# labels
train_labels_arr = data['train_labels']
valid_labels_arr = data['val_labels']
test_labels_arr = data['test_labels']

print("train_images_arr:", train_images_arr.shape, train_images_arr.dtype, "\ttrain_labels_arr:", train_labels_arr.shape, train_labels_arr.dtype)
print("valid_images_arr:", valid_images_arr.shape, valid_images_arr.dtype, "\tvalid_labels_arr:", valid_labels_arr.shape, valid_labels_arr.dtype)
print("test_images_arr:", test_images_arr.shape, test_images_arr.dtype, "\ttest_labels_arr:", test_labels_arr.shape, test_labels_arr.dtype)

## 標記分析

In [None]:
all_labels = np.concatenate([train_labels_arr, valid_labels_arr, test_labels_arr], axis=0)
print("有這些標記:", np.unique(all_labels))

In [None]:
# 各標記的數量
label_counts = [len(all_labels==i) for i in np.unique(all_labels)]
label_counts

In [None]:
# 各標記數量 畫成 圓餅圖(pie chart)
plt.pie(label_counts, labels=np.unique(all_labels), autopct='%1.1f%%')
plt.show()

## 資料集準備

In [None]:
# 處理 label 為 one-hot encoding (獨熱編碼)
# https://www.tensorflow.org/api_docs/python/tf/keras/utils/to_categorical
# PathMNIST	是9個分類
CLASSES = ['ADI', 'BACK', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']
train_labels_onehot =  to_categorical(train_labels_arr, num_classes=len(CLASSES), dtype='uint8')
valid_labels_onehot =  to_categorical(valid_labels_arr, num_classes=len(CLASSES), dtype='uint8')
test_labels_onehot =  to_categorical(test_labels_arr, num_classes=len(CLASSES), dtype='uint8')

print("原本的標註長這樣:", train_labels_arr[:10].tolist())
print("獨熱的標註長這樣:", train_labels_onehot[:10].tolist())
print("train_labels_onehot:", train_labels_onehot.shape, train_labels_onehot.dtype)
print("valid_labels_onehot:", valid_labels_onehot.shape, valid_labels_onehot.dtype)
print("test_labels_onehot:", test_labels_onehot.shape, test_labels_onehot.dtype)

In [None]:
# 隨機顯示一張圖片 和 它的標註
i = np.random.randint(100)  # 隨機一個整數 0-100
plt.imshow(train_images_arr[i])
plt.title(f"{train_labels_arr[i,0]}: {CLASSES[train_labels_arr[i,0]]}")
plt.show()

# 模型 model design

In [None]:
# 利用 "Sequential" 把每層 layer 疊起來
# input 大小為 28 x 28 x 3

# 最後的 Dense(9) 且 activation 用 softmax
# 代表最後 output 為 9個class（0~8）的機率

model = Sequential([
    layers.Input(shape=(28,28,3)),
    layers.Conv2D(32, (3,3), activation='relu'),
    layers.GlobalAveragePooling2D(),
    layers.Dense(32, activation='relu'),
    layers.Dense(9, activation='softmax')
])

# model每層定義好後需要經過compile
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
# 看看設計架構
model.summary()

In [None]:
# 看看設計架構
plot_model(model, show_shapes=True, show_dtype=True, show_layer_names=True)

# 訓練 training

In [None]:
# training  https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit
BATCH_SIZE = 32
EPOCH = 5

history = model.fit(train_images_arr, train_labels_onehot,  # < 訓練集
                    batch_size=BATCH_SIZE,
                    epochs=EPOCH,
                    validation_data=(valid_images_arr, valid_labels_onehot),
                    )

In [None]:
#@title 訓練曲線
# Training history visualization
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6,12))

# Plot training & validation accuracy values
ax1.plot(history.history['accuracy'])
ax1.plot(history.history['val_accuracy'])
ax1.set_title('Accuracy')
ax1.set(ylabel='Accuracy', xlabel='Epoch')
ax1.legend(['Train', 'Valid'], loc='upper left')

# Plot training & validation loss values
ax2.plot(history.history['loss'])
ax2.plot(history.history['val_loss'])
ax2.set_title('Model loss')
ax2.set(ylabel='Loss', xlabel='Epoch')
ax2.legend(['Train', 'Valid'], loc='upper right')

# plt.savefig('train_history.png', dpi=96)  # <-- save plot
plt.show()

# 評估 Evaluation

In [None]:
# evaluate validation dataset
model.evaluate(valid_images_arr, valid_labels_onehot, verbose=2)

# 預測 predict

In [None]:
y_pred = model.predict(test_images_arr,
                       batch_size=BATCH_SIZE,
                       verbose=1)
print("y_pred:", y_pred.shape, y_pred.dtype)

In [None]:
# 轉換為預測類別
y_pred_class = np.argmax(y_pred, axis=1)
print("y_pred_class:", y_pred_class.shape, y_pred_class.dtype)

In [None]:
# 隨機選一張 測試圖片 並 顯示 原始與預測的結果
i = np.random.randint(test_images_arr.shape[0])

# plot
plt.imshow(test_images_arr[i])
plt.title(f"GroundTrue:{test_labels_arr[i,0]} {CLASSES[test_labels_arr[i,0]]}  |  Predict:{y_pred_class[i]} {CLASSES[y_pred_class[i]]}")
plt.show()