In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models, callbacks
import tensorflow_hub as hub
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing import image
from PIL import Image

# 載入資料

In [2]:
# 將訓練資料拆分成7:3，分別是訓練與測試集，並且取得Info
[train_ds, valid_ds, test_ds], info = tfds.load('colorectal_histology',
                            split=['train[:70%]','train[70%:85%]', 'train[85%:]'],
                            shuffle_files=True,
                            with_info=True)

Downloading and preparing dataset 246.14 MiB (download: 246.14 MiB, generated: Unknown size, total: 246.14 MiB) to /root/tensorflow_datasets/colorectal_histology/2.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/5000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/colorectal_histology/2.0.0.incompleteAYXWUD/colorectal_histology-train.tfr…

Dataset colorectal_histology downloaded and prepared to /root/tensorflow_datasets/colorectal_histology/2.0.0. Subsequent calls will reuse this data.


In [3]:
# 參數設定
#input_shapes = info.features[info.supervised_keys[0]].shape
input_shapes = (224,224,3)
img_size = input_shapes[:2]
batch_size = 32
epoch = 50

In [4]:
# 預處理函數
def preprocess_data(data):
    image = data['image']
    label = data['label']
    # 將圖片大小調整為指定大小,並正規化像素值至 [0, 1]
    image = tf.image.resize(image, img_size) / 255.0
    return image, label

# 將預處理函數應用到數據集，並將資料分批
train_data = train_ds.map(preprocess_data).batch(batch_size)
valid_data = valid_ds.map(preprocess_data).batch(batch_size)
test_data = test_ds.map(preprocess_data).batch(batch_size)

# EarlyStopping設定
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# 定義模型

In [None]:
# 定義 CNN 模型 (用於callbacks)
model_1 = models.Sequential()
model_1.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shapes))
model_1.add(layers.MaxPooling2D((2, 2)))
model_1.add(layers.Conv2D(64, (3, 3), activation='relu'))
model_1.add(layers.MaxPooling2D((2, 2)))
model_1.add(layers.Conv2D(128, (3, 3), activation='relu'))
model_1.add(layers.MaxPooling2D((2, 2)))
model_1.add(layers.Conv2D(256, (3, 3), activation='relu'))
model_1.add(layers.MaxPooling2D((2, 2)))
model_1.add(layers.Conv2D(512, (3, 3), activation='relu'))
model_1.add(layers.MaxPooling2D((2, 2)))
model_1.add(layers.Flatten())
model_1.add(layers.Dense(256, activation='relu'))
model_1.add(layers.Dense(128, activation='relu'))
model_1.add(layers.Dense(info.features['label'].num_classes, activation='softmax'))
model_1.summary()
# 編譯模型
model_1.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 111, 111, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 54, 54, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 26, 26, 128)       0

In [None]:
# 使用 TensorFlow Hub 中的 MobileNetV2 預訓練模型
MNV2_model = hub.load("https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4")
# 凍結預訓練模型的權重
MNV2_model.trainable = False

model_2 = models.Sequential()
model_2.add(hub.KerasLayer(MNV2_model, trainable=False, input_shape=input_shapes))
model_2.add(layers.Flatten())
model_2.add(layers.Dense(128, activation='relu'))
model_2.add(layers.Dense(info.features['label'].num_classes, activation='softmax'))
model_2.summary()
# 編譯模型
model_2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 1001)              3540265   
                                                                 
 flatten_1 (Flatten)         (None, 1001)              0         
                                                                 
 dense_3 (Dense)             (None, 128)               128256    
                                                                 
 dense_4 (Dense)             (None, 8)                 1032      
                                                                 
Total params: 3669553 (14.00 MB)
Trainable params: 129288 (505.03 KB)
Non-trainable params: 3540265 (13.51 MB)
_________________________________________________________________


In [None]:
# 建立 VGG19 模型
VGG19_model = tf.keras.applications.VGG19(input_shape=input_shapes, include_top=False, weights='imagenet')
# 凍結 VGG19 的權重
VGG19_model.trainable = False

model_3 = models.Sequential(VGG19_model)
model_3.add(layers.Flatten())
model_3.add(layers.Dense(256, activation='relu'))
model_3.add(layers.Dropout(0.5))
model_3.add(layers.Dense(info.features['label'].num_classes, activation='softmax'))
model_3.summary()
# 編譯模型
model_3.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg19 (Functional)          (None, 7, 7, 512)         20024384  
                                                                 
 flatten_2 (Flatten)         (None, 25088)             0         
                                                                 
 dense_5 (Dense)             (None, 256)               6422784   
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_6 (Dense)             (None, 8)                 2056      
                                                                 
Total params: 26449224 (100.90 MB)
Trainable pa

In [5]:
# 建立 DenseNet121 模型
DN121_model = tf.keras.applications.DenseNet121(input_shape=input_shapes, include_top=False, weights='imagenet')
# 凍結 DenseNet121 的權重
DN121_model.trainable = False

model_4 = models.Sequential(DN121_model)
model_4.add(layers.GlobalAveragePooling2D())
model_4.add(layers.Dense(256, activation='relu'))
model_4.add(layers.Dropout(0.5))
model_4.add(layers.Dense(info.features['label'].num_classes, activation='softmax'))
model_4.summary()
# 編譯模型
model_4.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 densenet121 (Functional)    (None, 7, 7, 1024)        7037504   
                                                                 
 global_average_pooling2d (  (None, 1024)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 256)               262400    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 8)                 2056      
                                        

# 訓練模型

In [None]:
# 使用 CNN

history_1 = model_1.fit(train_data, epochs=epoch, validation_data=valid_data, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50


In [None]:
# 使用TFhub中 MobileNetV2 model
history_2 = model_2.fit(train_data, epochs=epoch, validation_data=valid_data, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50


In [None]:
# 使用 VGG19
history_3 = model_3.fit(train_data, epochs=epoch, validation_data=valid_data, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50


In [None]:
# 使用 DenseNet121
history_4 = model_4.fit(train_data, epochs=epoch, validation_data=valid_data, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


# 儲存模型權重

In [None]:
# 最終以使用DenseNet121權重作為網頁式直腸癌識別模型
model_4.save_weights('DenseNet121_model_weights.h5')
# 加載模型權重
# model_4.load_weights('DenseNet121_model_weights.h5')

# 評估模型

In [None]:
model_list = [model_1, model_2, model_3, model_4]
result = pd.DataFrame(columns=['Accuracy', 'Loss'])

for model in model_list:
  test_loss, test_acc = model.evaluate(test_data)
  result = pd.concat([result, pd.DataFrame({'Accuracy': [test_acc], 'Loss': [test_loss]})], ignore_index=True)



In [None]:
result.index = ['CNN(有callbacks)', 'MobileNetV2', 'VGG19', 'DenseNet121']

result

Unnamed: 0,Accuracy,Loss
CNN(有callbacks),0.774667,0.573458
MobileNetV2,0.896,0.300849
VGG19,0.854667,0.416943
DenseNet121,0.921333,0.213344


In [None]:
'''
def tiff_to_png(input_path, output_path):
    try:
        # 打開 TIFF 圖像
        with Image.open(input_path) as img:
            # 將圖像轉換為 RGB 模式
            img = img.convert('RGB')
            # 保存為 PNG 格式
            img.save(output_path, format='PNG')
            print("轉換成功")
    except Exception as e:
        print("轉換失敗:", e)

# 指定輸入和輸出路徑
input_file = "/content/MUCOSA.tif"
output_file = "/content/MUCOSA.png"

# 呼叫轉換函數
tiff_to_png(input_file, output_file)
'''

In [14]:
# 載入模型權重
model_4.load_weights('DenseNet121_model_weights.h5')

# 載入待辨識的圖像
img_path = '/content/testA_8_jpg.rf.d2c0fc37e92887aec95edc83b7dfdb6d.jpg'  # 替換為你的圖像路徑
img = image.load_img(img_path, target_size=(224, 224), color_mode='rgb')  # 載入圖像並轉換為彩色模式
x_new = image.img_to_array(img)
x_new = np.expand_dims(x_new, axis=0) / 255.0

In [15]:
# 進行預測
predictions = model_4.predict(x_new)

# 找出最大概率的類別索引
predicted_class = np.argmax(predictions)

# 定義類別標籤列表
labels = ['tumour epithelium', 'simple stroma', 'complex stroma(stroma that contains single tumour cells and/or single immune cells)',
      'immune cell conglomerates', 'debris and mucus', 'mucosal glands', 'adipose tissue', 'background']

# 輸出最大概率的類別索引
print("predictions:", labels[predicted_class])

predictions: immune cell conglomerates
