### Imports and setup

In [None]:
!pip install import-ipynb

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

In [None]:
import import_ipynb
from google.colab import drive
import os
import pickle
import gc
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score
from keras.utils import np_utils
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
drive.mount('/content/drive/')
%cd '/content/drive/MyDrive/FYP'

In [None]:
from data_loader_ordered import load_data

### Model Imports

In [None]:
import tensorflow
gpus = tensorflow.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  tensorflow.config.experimental.set_memory_growth(gpu, True)
import tensorflow.keras.backend as K
from keras.callbacks import Callback
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import LearningRateScheduler

In [None]:
from tensorflow.keras.applications import Xception, EfficientNetV2S, InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input

In [None]:
tensorflow.config.run_functions_eagerly(True)

### Model setup functions


In [None]:
def random_aug(img):
  img = np.rot90(img, np.random.choice([0, 1, 2, 3]))
  if np.random.choice([0, 1]):
      img = np.flipud(img)
  if np.random.choice([0, 1]):
      img = np.fliplr(img)
  return img

In [None]:
def get_base(model_func = None, base_trainable=True, freeze_before=None):
  base_model = model_func(
      weights='imagenet', include_top=False, input_shape=(150,150,3))
  
  if base_trainable:
    base_model.trainable = True
    if freeze_before:
      trainable = False
      for layer in base_model.layers:
        if layer.name.startswith(freeze_before):
          trainable = True
        if not trainable:
          layer.trainable = False
  else:
    base_model.trainable = False

  return base_model

In [None]:
def create_model(base, num_classes, dropout=0, n_hidden=1024, 
                 activation='relu', kernel_reg = 'l2'):
  model = Sequential()
  model.add(base)
  model.add(GlobalAveragePooling2D())
  model.add(Dense(n_hidden, activation='relu', kernel_regularizer='l2'))
  if dropout:
    model.add(Dropout(dropout))
  model.add(Dense(num_classes, activation='softmax'))
  return model

In [None]:
def print_model_summary(base_model, num_classes, kwargs):
  for layer in base_model.layers:
    print(layer.name, "\t", layer.trainable)
  model = create_model(base_model, num_classes, **kwargs)
  model.summary()

### Training and eval functions

In [None]:
def get_clf_report(y_true, y_pred):
  y_true = np.argmax(y_true, axis=1)
  y_pred = np.argmax(y_pred, axis=1)
  report = classification_report(y_true, y_pred, output_dict=True)
  return report

In [None]:
def get_detrac_clf_report(y_true, y_pred):
  y_true = np.argmax(y_true, axis=1)
  y_pred = np.argmax(y_pred, axis=1)
  for n in range(0, 16, 2):
    y_true = np.where(y_true==n+1, n, y_true)
    y_pred = np.where(y_pred==n+1, n, y_pred)
  report = classification_report(y_true, y_pred, output_dict=True)
  return report

In [None]:
def pickle_object(obj, file_name):
  with open(file_name, "wb") as f:
    %cd '/content/drive/MyDrive/FYP'
    assert(os.getcwd() == "/content/drive/MyDrive/FYP")
    pickle.dump(obj, f)
    print(f"Saved file {file_name}")

In [None]:
def get_decay_fn(decay_factor=0.9, decay_rate=5):
  def step_decay(epoch, lr):
    if epoch%decay_rate == 0 and epoch != 0:
      return lr * decay_factor
    return lr
  return step_decay

In [None]:
def detrac_accuracy(y_true_T, y_pred_T):
  y_true = y_true_T.numpy()
  y_pred = y_pred_T.numpy()
  y_true = np.argmax(y_true, axis=1)
  y_pred = np.argmax(y_pred, axis=1)
  for n in range(0, 16, 2):
    y_true = np.where(y_true==n+1, n, y_true)
    y_pred = np.where(y_pred==n+1, n, y_pred)
  return accuracy_score(y_true, y_pred)

In [None]:
def run_experiment(X, y, num_classes, optim_fn, 
                   lr_schedule, base_params, lr_params, cv_splits=None,
                   epochs=50, batch_size=128, detrac=False, random_state=123,
                   save_model=False, model_save_path=None):

  best_acc = 0

  histories = []
  clf_reports = []
  i = 1
  print_summary = True

  skf = StratifiedKFold(
      n_splits=cv_splits, random_state=random_state, shuffle=True)
  
  for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    X_train = preprocess_input(X_train)
    X_test = preprocess_input(X_test)
    y_train = np_utils.to_categorical(y_train, num_classes)
    y_test = np_utils.to_categorical(y_test, num_classes)

    base_model = get_base(**base_params)
    model = create_model(base_model, num_classes, **dense_params)
    train_datagen = ImageDataGenerator(
        preprocessing_function=random_aug)

    optimizer = optim_fn(**lr_params)
    metrics = ['accuracy']
    model.compile(loss='categorical_crossentropy', 
                optimizer=optimizer, metrics=metrics,
                run_eagerly=True)
    if print_summary:
      print(model.summary())
      print_summary = False

    print(f"Fitting split {i}")

    history = model.fit(
        train_datagen.flow((X_train, y_train), batch_size=batch_size), 
        validation_data=(X_test, y_test),
        steps_per_epoch=(len(X_train)//batch_size),
        epochs=epochs, callbacks=[lr_schedule], verbose=1)
    
    histories.append(history.history)

    y_pred = model.predict(X_test)
    if detrac:
      clf_report = get_detrac_clf_report(y_test, y_pred)
    else:
      clf_report = get_clf_report(y_test, y_pred)

    clf_reports.append(clf_report)

    print(clf_report)


    curr_acc = clf_report["accuracy"]
    if curr_acc > best_acc:
      best_acc = curr_acc
      if save_model:
        model.save(model_save_path)
        print("Saving new best model")
    i += 1
    del model
    gc.collect()
  return histories, clf_reports

### Main

In [None]:
DATA_DIR = '/content/drive/MyDrive/FYP/Kather_norm'

In [None]:
# skip_classes = [2,3,4,5,7]
skip_classes = []
random_state = 123

In [None]:
X, y, NUM_CLASSES = load_data(DATA_DIR, skip_classes=skip_classes, test_split=0, 
                              shuffle=True, random_state=random_state)

Loading 01_TUMOR...


100%|██████████| 625/625 [00:03<00:00, 179.88it/s]


Loading 02_STROMA...


100%|██████████| 625/625 [00:02<00:00, 224.73it/s]


Loading 03_COMPLEX...


100%|██████████| 625/625 [00:02<00:00, 237.67it/s]


Loading 04_LYMPHO...


100%|██████████| 625/625 [00:01<00:00, 327.37it/s]


Loading 05_DEBRIS...


100%|██████████| 625/625 [00:01<00:00, 331.03it/s]


Loading 06_MUCOSA...


100%|██████████| 625/625 [00:01<00:00, 324.44it/s]


Loading 07_ADIPOSE...


100%|██████████| 625/625 [00:02<00:00, 297.80it/s]


Loading 08_EMPTY...


100%|██████████| 625/625 [00:01<00:00, 409.28it/s]


Done


In [None]:
NUM_CLASSES

8

In [None]:
experiment_name = "InceptionV3_deep"

In [None]:
base_params = {
    'model_func': InceptionV3, 
    'base_trainable': True, 
    'freeze_before': None
}
dense_params = {
    'n_hidden': 1024,
    'dropout': 0,
    'activation': 'relu', 
    'kernel_reg': 'l1'
}

lr_params = {
    'learning_rate': 1e-2,
    'momentum': 0.9
}
    
other_params = {
    'cv_splits': 10,
    'epochs': 30,
    'batch_size': 32,
    'save_model': False,
    'model_save_path': f"{experiment_name}.h5",
    'detrac': False
}

decay_params = {
    'decay_factor': 0.75,
    'decay_rate': 10
}

In [None]:
OPTIM_FN = SGD
step_decay = get_decay_fn(**decay_params)
LR_SCHEDULE = LearningRateScheduler(step_decay)

In [None]:
# base = get_base(**base_params)

In [None]:
# print_model_summary(base, NUM_CLASSES, dense_params)

In [None]:
histories, clf_reports = run_experiment(
    X, y, NUM_CLASSES, OPTIM_FN, LR_SCHEDULE,
    base_params, lr_params, **other_params,
    random_state=123)
# DOUBLE CHECK BASE OUTPUT AND PREPROCESSING REQS