In [1]:
import os
import glob
import pandas as pd
import numpy as np
import random
import collections
from vit_keras import vit, utils
import tensorflow_addons as tfa
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow.keras.metrics
import pickle
import json

from sklearn.utils import class_weight


In [2]:
experiment =  "Mohs_BCC_SCC_2023_propsplit"

working_dir = '' # PUT YOUR HOME HERE
processed_data = working_dir + "processed/"
experiment_folder = working_dir + experiment + "/"
model_output_folder = working_dir + experiment + "/" + "model" + "/"

with open(experiment_folder+'MODEL_PARAMETERS.txt','r') as f:
    model_file = f.read()        
    MODEL_PARAMETERS = eval(model_file)

MODEL_PARAMETERS

{'model_name': 'Mohs_BCC_SCC_2023_propsplit',
 'image_size': 512,
 'loss': 'categorical_crossentropy',
 'class_mode': 'categorical',
 'output_layers': 3,
 'output_activation': 'softmax',
 'activation': 'relu',
 'optimizer': 'sgd',
 'num_epochs': 30,
 'batch_size': 4}

In [3]:
if not os.path.isdir(model_output_folder):
    print("Making a new folder for the new experiment")
    os.mkdir(model_output_folder)
else :
    print("Caution : Will overwrite existing data and models")


Making a new folder for the new experiment


In [4]:
files = os.listdir(model_output_folder)
substring = ".json"
contains_substring = [substring in s for s in files]

if any(contains_substring):
    print("Caution : Will overwrite existing data and models")
else:
    print("Ready to train.")


Ready to train.


In [None]:
train_data = pd.read_csv(experiment_folder + "train_data.csv",index_col = 0)
train_data

In [None]:
test_data = pd.read_csv(experiment_folder + "test_data.csv",index_col = 0)
test_data

In [None]:
print(np.unique(train_data["patch_cancer"]))
print(collections.Counter(train_data["patch_cancer"]))

class_weights = class_weight.compute_class_weight('balanced',
                                                 classes = np.unique(train_data["patch_cancer"]),
                                                 y = train_data["patch_cancer"])

class_weights = dict(zip([0,1,2], class_weights))

class_weights

In [None]:
model = vit.vit_l32(
    image_size=MODEL_PARAMETERS["image_size"],
    activation=MODEL_PARAMETERS["activation"],
    pretrained=True,
    include_top=False,
    pretrained_top=False,
)

In [None]:
train_datagen = ImageDataGenerator(
        brightness_range=[0.5, 1.5],
        rotation_range=.1,
        width_shift_range=0.15,
        height_shift_range=0.15,
        rescale=1./255,
        zoom_range=0.05,
        horizontal_flip=True,
        vertical_flip = True,
        fill_mode='constant')

val_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_generator=train_datagen.flow_from_dataframe(
dataframe=train_data,
x_col="full_path",
y_col="patch_cancer",
batch_size=MODEL_PARAMETERS['batch_size'],
shuffle=True,
class_mode=MODEL_PARAMETERS["class_mode"],
target_size=(MODEL_PARAMETERS["image_size"],MODEL_PARAMETERS["image_size"]))

In [None]:
val_generator=val_datagen.flow_from_dataframe(
dataframe=test_data,
x_col="full_path",
y_col="patch_cancer",
batch_size=MODEL_PARAMETERS['batch_size'],
shuffle=False,
class_mode=MODEL_PARAMETERS["class_mode"],
target_size=(MODEL_PARAMETERS["image_size"],MODEL_PARAMETERS["image_size"]))

In [None]:
x = model.output
predictions = layers.Dense(MODEL_PARAMETERS["output_layers"], activation=MODEL_PARAMETERS["output_activation"])(x)
new_model = Model(inputs=model.input, outputs=[predictions])

In [None]:
model_json = new_model.to_json()
with open(model_output_folder + MODEL_PARAMETERS["model_name"]+".json", "w") as json_file:
    json_file.write(model_json)

In [None]:
es = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=.5*MODEL_PARAMETERS['num_epochs'])

mc = ModelCheckpoint(
    model_output_folder + MODEL_PARAMETERS["model_name"]+".{val_loss:.3f}_cp.ckpt",
    monitor= "val_loss",
    mode='min',
    verbose=1, 
    save_best_only=True,
    save_weights_only=True,
    save_freq="epoch"
)

In [None]:
nbatches_train, mod = divmod(train_data.shape[0], MODEL_PARAMETERS['batch_size'])
STEP_SIZE_VALID=val_generator.n//val_generator.batch_size

In [None]:
new_model.compile(
    optimizer=MODEL_PARAMETERS["optimizer"],
    loss=MODEL_PARAMETERS["loss"]
)

In [None]:
history = new_model.fit(
    train_generator,
    validation_data=val_generator,
    validation_steps=STEP_SIZE_VALID,
    steps_per_epoch=nbatches_train,
    epochs=MODEL_PARAMETERS['num_epochs'],
    workers=1,
    shuffle=True,
    #callbacks=[mc, es],
    callbacks=[mc],
    class_weight = class_weights,
    verbose=1)


In [None]:
with open(model_output_folder+'trainHistoryDict', 'wb') as file_pi:
        pickle.dump(history.history, file_pi)
        
history.history