### Imports and setup

In [None]:
!pip install import-ipynb



In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sat May  7 13:55:05 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.3 gigabytes of available RAM

You are using a high-RAM runtime!


In [None]:
import import_ipynb
from google.colab import drive
import os
import pickle
import gc
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score
from keras.utils import np_utils
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
drive.mount('/content/drive/')
%cd '/content/drive/MyDrive/FYP'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive/FYP


In [None]:
from data_loader_ordered import load_data

importing Jupyter notebook from data_loader_ordered.ipynb


### Model Imports

In [None]:
import tensorflow
gpus = tensorflow.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  tensorflow.config.experimental.set_memory_growth(gpu, True)
import tensorflow.keras.backend as K
from keras.callbacks import Callback
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import LearningRateScheduler

In [None]:
from tensorflow.keras.applications import Xception, EfficientNetV2S, InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input

In [None]:
tensorflow.config.run_functions_eagerly(True)

### Model setup functions


In [None]:
def random_aug(img):
  img = np.rot90(img, np.random.choice([0, 1, 2, 3]))
  if np.random.choice([0, 1]):
      img = np.flipud(img)
  if np.random.choice([0, 1]):
      img = np.fliplr(img)
  return img

In [None]:
def get_base(model_func=None, base_trainable=True, freeze_before=None):
  base_model = model_func(
      weights='imagenet', include_top=False, input_shape=(150,150,3))
  
  if base_trainable:
    base_model.trainable = True
    if freeze_before:
      trainable = False
      for layer in base_model.layers:
        if layer.name.startswith(freeze_before):
          trainable = True
        if not trainable:
          layer.trainable = False
  else:
    base_model.trainable = False

  return base_model

In [None]:
def create_model(base, num_classes, dropout=0, n_hidden=1024, 
                 activation='relu', kernel_reg = 'l2'):
  model = Sequential()
  model.add(base)
  model.add(GlobalAveragePooling2D())
  model.add(Dense(n_hidden, activation='relu', kernel_regularizer='l2'))
  if dropout:
    model.add(Dropout(dropout))
  model.add(Dense(num_classes, activation='softmax'))
  return model

In [None]:
def print_model_summary(base_model, num_classes, kwargs):
  for layer in base_model.layers:
    print(layer.name, "\t", layer.trainable)
  model = create_model(base_model, num_classes, **kwargs)
  model.summary()

### Training and eval functions

In [None]:
def get_clf_report(y_true, y_pred):
  y_true = np.argmax(y_true, axis=1)
  y_pred = np.argmax(y_pred, axis=1)
  report = classification_report(y_true, y_pred, output_dict=True)
  return report

In [None]:
def get_detrac_clf_report(y_true, y_pred):
  y_true = np.argmax(y_true, axis=1)
  y_pred = np.argmax(y_pred, axis=1)
  for n in range(0, 16, 2):
    y_true = np.where(y_true==n+1, n, y_true)
    y_pred = np.where(y_pred==n+1, n, y_pred)
  report = classification_report(y_true, y_pred, output_dict=True)
  return report

In [None]:
def pickle_object(obj, file_name):
  with open(file_name, "wb") as f:
    %cd '/content/drive/MyDrive/FYP'
    assert(os.getcwd() == "/content/drive/MyDrive/FYP")
    pickle.dump(obj, f)
    print(f"Saved file {file_name}")

In [None]:
def get_decay_fn(decay_factor=0.9, decay_rate=10):
  def step_decay(epoch, lr):
    if epoch%decay_rate == 0 and epoch != 0:
      return lr * decay_factor
    return lr
  return step_decay

In [None]:
def replace_output(model, num_output):
  model = Sequential(model.layers[:-1])
  model.add(Dense(num_output, activation='softmax'))
  return model

In [None]:
def detrac_accuracy(y_true_T, y_pred_T):
  y_true = y_true_T.numpy()
  y_pred = y_pred_T.numpy()
  y_true = np.argmax(y_true, axis=1)
  y_pred = np.argmax(y_pred, axis=1)
  for n in range(0, 16, 2):
    y_true = np.where(y_true==n+1, n, y_true)
    y_pred = np.where(y_pred==n+1, n, y_pred)
  return accuracy_score(y_true, y_pred)

In [None]:
def preprocess_and_split_data(X, y, train_index, test_index, n_classes):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  X_train = preprocess_input(X_train)
  X_test = preprocess_input(X_test)
  y_train = np_utils.to_categorical(y_train, n_classes)
  y_test = np_utils.to_categorical(y_test, n_classes)
  return X_train, X_test, y_train, y_test

In [None]:
def run_experiment(X_d, y_d, X_n, y_n, num_classes_d, num_classes_n, optim_fn, 
                   lr_schedule, base_params, dense_params, lr_params, lr_params_a,
                   cv_splits=None, epochs=None, epochs_a=None, 
                   batch_size=None, random_state=None):

  histories_detrac = []
  clf_reports_detrac = []
  histories_auto = []
  clf_reports_auto = []
  i = 1
  print_summary = True

  skf = StratifiedKFold(
      n_splits=cv_splits, random_state=random_state, shuffle=True)
  
  for train_index, test_index in skf.split(X_d, y_d):

    # decomp data
    X_train_d, X_test_d, y_train_d, y_test_d = preprocess_and_split_data(
        X_d, y_d, train_index, test_index, num_classes_d
    )

    # normal data
    X_train_n, X_test_n, y_train_n, y_test_n = preprocess_and_split_data(
        X_n, y_n, train_index, test_index, num_classes_n
    )

    train_datagen = ImageDataGenerator(
        preprocessing_function=random_aug)
    
    # check train indicies
    matching = (X_train_d == X_train_n)
    assert(matching.all())

    base_model = get_base(**base_params)
    model_detrac = create_model(base_model, num_classes_d, **dense_params)
    optimizer = optim_fn(**lr_params)
    metrics = ['accuracy', detrac_accuracy]
    model_detrac.compile(loss='categorical_crossentropy', 
                optimizer=optimizer, metrics=metrics,
                run_eagerly=True)
    
    if print_summary:
      print(model_detrac.summary())

    print(f"Fitting split {i}")
    history_d = model_detrac.fit(
        train_datagen.flow((X_train_d, y_train_d), batch_size=batch_size), 
        validation_data=(X_test_d, y_test_d),
        steps_per_epoch=(len(X_train_d)//batch_size),
        epochs=epochs, callbacks=[lr_schedule], verbose=1)
    
    histories_detrac.append(history_d.history)
    y_pred = model_detrac.predict(X_test_d)
    clf_report = get_detrac_clf_report(y_test_d, y_pred)
    clf_reports_detrac.append(clf_report)
    print('Detrac report:')
    print(clf_report)


    model_auto = replace_output(model_detrac, num_classes_n)
    optimizer = optim_fn(**lr_params_a)
    model_auto.compile(loss='categorical_crossentropy', 
            optimizer=optimizer, metrics=['accuracy'],
            run_eagerly=True)

    if print_summary:
      print(model_auto.summary())
      print_summary = False

    history_a = model_auto.fit(
      train_datagen.flow((X_train_n, y_train_n), batch_size=batch_size), 
      validation_data=(X_test_n, y_test_n),
      steps_per_epoch=(len(X_train_n)//batch_size),
      epochs=epochs_a, callbacks=[lr_schedule], verbose=1)
    
    histories_auto.append(history_a.history)
    y_pred = model_auto.predict(X_test_n)
    clf_report = get_clf_report(y_test_n, y_pred)
    clf_reports_auto.append(clf_report)
    print('Auto report:')
    print(clf_report)
    
    i += 1
    del model_detrac
    del model_auto
    gc.collect()
  return histories_detrac, clf_reports_detrac, histories_auto, clf_reports_auto

### Main

In [None]:
DATA_DIR_NORM = '/content/drive/MyDrive/FYP/Kather_norm'
DATA_DIR_DECOMP = '/content/drive/MyDrive/FYP/Kather_decomp'

In [None]:
random_state = 123

In [None]:
X_norm, y_norm, NUM_CLASSES_NORM = load_data(DATA_DIR_NORM, skip_classes=[], test_split=0, 
                              shuffle=True, random_state=random_state)

Loading 01_TUMOR...


100%|██████████| 625/625 [00:01<00:00, 409.32it/s]


Loading 02_STROMA...


100%|██████████| 625/625 [00:01<00:00, 340.68it/s]


Loading 03_COMPLEX...


100%|██████████| 625/625 [00:02<00:00, 268.47it/s]


Loading 04_LYMPHO...


100%|██████████| 625/625 [00:02<00:00, 248.08it/s]


Loading 05_DEBRIS...


100%|██████████| 625/625 [00:01<00:00, 364.24it/s]


Loading 06_MUCOSA...


100%|██████████| 625/625 [00:01<00:00, 446.47it/s]


Loading 07_ADIPOSE...


100%|██████████| 625/625 [00:01<00:00, 504.90it/s]


Loading 08_EMPTY...


100%|██████████| 625/625 [00:01<00:00, 568.94it/s]


Done


In [None]:
X_decomp, y_decomp, NUM_CLASSES_DECOMP = load_data(DATA_DIR_DECOMP, skip_classes=[], test_split=0, 
                              shuffle=True, random_state=random_state)

Loading 01_TUMOR_CLUSTER_0...


100%|██████████| 260/260 [00:00<00:00, 441.40it/s]


Loading 01_TUMOR_CLUSTER_1...


100%|██████████| 365/365 [00:00<00:00, 447.44it/s]


Loading 02_STROMA_CLUSTER_0...


100%|██████████| 254/254 [00:00<00:00, 475.94it/s]


Loading 02_STROMA_CLUSTER_1...


100%|██████████| 371/371 [00:00<00:00, 451.41it/s]


Loading 03_COMPLEX_CLUSTER_0...


100%|██████████| 284/284 [00:00<00:00, 434.00it/s]


Loading 03_COMPLEX_CLUSTER_1...


100%|██████████| 341/341 [00:00<00:00, 476.21it/s]


Loading 04_LYMPHO_CLUSTER_0...


100%|██████████| 226/226 [00:00<00:00, 442.94it/s]


Loading 04_LYMPHO_CLUSTER_1...


100%|██████████| 399/399 [00:00<00:00, 435.04it/s]


Loading 05_DEBRIS_CLUSTER_0...


100%|██████████| 390/390 [00:00<00:00, 467.18it/s]


Loading 05_DEBRIS_CLUSTER_1...


100%|██████████| 235/235 [00:00<00:00, 476.08it/s]


Loading 06_MUCOSA_CLUSTER_0...


100%|██████████| 264/264 [00:00<00:00, 471.24it/s]


Loading 06_MUCOSA_CLUSTER_1...


100%|██████████| 361/361 [00:00<00:00, 471.72it/s]


Loading 07_ADIPOSE_CLUSTER_0...


100%|██████████| 378/378 [00:00<00:00, 498.37it/s]


Loading 07_ADIPOSE_CLUSTER_1...


100%|██████████| 247/247 [00:00<00:00, 494.70it/s]


Loading 08_EMPTY_CLUSTER_0...


100%|██████████| 307/307 [00:00<00:00, 605.25it/s]


Loading 08_EMPTY_CLUSTER_1...


100%|██████████| 318/318 [00:00<00:00, 559.75it/s]


Done


In [None]:
NUM_CLASSES_DECOMP

16

In [None]:
experiment_name = "Xception_finetune_detrac"

In [None]:
base_params = {
    'model_func': Xception, 
    'base_trainable': True, 
    'freeze_before': "block14"
}
dense_params = {
    'n_hidden': 1024,
    'dropout': 0,
    'activation': 'relu', 
    'kernel_reg': 'l1'
}

lr_params = {
    'learning_rate': 1e-2,
    'momentum': 0.9
}
lr_params_a = {
    'learning_rate': 1e-3,
    'momentum': 0.9
}
other_params = {
    'cv_splits': 10
    'epochs': 40,
    'batch_size': 32,
    'epochs_a': 7
}

decay_params = {
    'decay_factor': 0.9,
    'decay_rate': 10
}

In [None]:
OPTIM_FN = SGD
step_decay = get_decay_fn(**decay_params)
LR_SCHEDULE = LearningRateScheduler(step_decay)

In [None]:
# base = get_base(**base_params)

In [None]:
# print_model_summary(base, NUM_CLASSES, dense_params)

In [None]:
histories_detrac, clf_reports_detrac, histories_auto, clf_reports_auto = run_experiment(
    X_decomp, y_decomp, X_norm, y_norm, NUM_CLASSES_DECOMP, NUM_CLASSES_NORM, 
    OPTIM_FN, LR_SCHEDULE, base_params, dense_params, lr_params, lr_params_a, **other_params,
    random_state=123)

# DOUBLE CHECK BASE OUTPUT AND PREPROCESSING REQS

In [None]:
np.mean([rep["accuracy"] for rep in clf_reports_detrac])

0.9134

In [None]:
np.mean([rep["accuracy"] for rep in clf_reports_auto])

0.9246000000000001