In [1]:
import glob, os
import numpy as np
import pandas as pd
from vit_keras import vit, utils
import tensorflow_addons as tfa
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import class_weight



ModuleNotFoundError: No module named 'cv2'

In [2]:
# S3 Path to dataset s3://guardian-datasets/Guardian_curated_knock_grass_traintest.zip

IMAGE_SIZE = 384

dataset_loc = '/home/ubuntu/Data/Guardian_V2/'

model_output_folder = "/home/ubuntu/Models/inprogress/"

MODEL_PARAMETERS = {"model_name" : "guardian_v2_weights",                    
                    "activation" : "relu", # can be relu / sigmoid
                    "class_weights" : "True", # weight underrepresented
                    "num_epochs" : 50,
                    "batch_size" : 5}

model_output_folder = model_output_folder+ MODEL_PARAMETERS["model_name"] + "/"

### Make model output folder if it doesnt exist

if not os.path.isdir(model_output_folder):
    os.mkdir(model_output_folder)

# Write out parameters

with open(model_output_folder+'MODEL_PARAMETERS.txt','w') as data: 
      data.write(str(MODEL_PARAMETERS))
        
print(MODEL_PARAMETERS)


{'model_name': 'guardian_v2_weights', 'activation': 'relu', 'class_weights': 'True', 'num_epochs': 50, 'batch_size': 5}


In [3]:
def makeDataFrameForConditionImagesInFolders(dir_path):
    # This logic creates a dataframe of image -> label used by keras
    # cant use the simple image dataloader for regression tasks :(
    image_paths = []
    labels = []
    print('loading images from ', dir_path)
    train_classes = os.listdir(dir_path)

    for int_class in train_classes:
        int_images = os.listdir(dir_path + "/" + int_class + "/")

        for img in int_images:
            image_paths.append(dir_path + "/" + int_class + "/" + img)
            labels.append(int_class)

        print('Loaded images for', int_class, '\t', len(int_images))

    data = {'filepath': image_paths, 'target': labels}

    # Create DataFrame
    df = pd.DataFrame(data)
    return df

In [4]:
train_df = makeDataFrameForConditionImagesInFolders(dataset_loc + 'Train')

'''
train_df = train_df.sort_values("target").reset_index()
train_df["target_string"] = train_df["target"]
train_df["target"] = pd.Categorical(train_df["target"])
train_df["target"] = train_df.target.cat.codes
train_df["target"] = pd.to_numeric(train_df["target"])
train_df["target"] = str(train_df["target"])
'''

train_df.to_csv(model_output_folder + "train_df_guardian.csv")

val_df = makeDataFrameForConditionImagesInFolders(dataset_loc + 'Test')

'''
val_df = train_df.sort_values("target").reset_index()
val_df["target_string"] = val_df["target"]
val_df["target"] = pd.Categorical(val_df["target"])
val_df["target"] = val_df.target.cat.codes
val_df["target"] = pd.to_numeric(val_df["target"])
val_df["target"] = str(val_df["target"])
'''

val_df.to_csv(model_output_folder + "test_df_guardian.csv")

class_weights = class_weight.compute_class_weight('balanced',
                                                 classes = np.unique(train_df["target"]),
                                                 y = train_df["target"])

class_weights = dict(zip(np.unique(train_df["target"]), class_weights))
class_weights

loading images from  /home/ubuntu/Data/Guardian_V2/Train
Loaded images for knock 	 907
Loaded images for misc 	 1246
Loaded images for aft 	 1154
Loaded images for uncoveredpool 	 754
Loaded images for debris 	 794
Loaded images for bef 	 1084
Loaded images for dur 	 1233
loading images from  /home/ubuntu/Data/Guardian_V2/Test
Loaded images for knock 	 100
Loaded images for misc 	 100
Loaded images for aft 	 100
Loaded images for uncoveredpool 	 100
Loaded images for debris 	 100
Loaded images for bef 	 100
Loaded images for dur 	 100


{'aft': 0.8878435256251548,
 'bef': 0.9451765946230891,
 'debris': 1.2903922274199353,
 'dur': 0.8309581740238674,
 'knock': 1.1296267128681683,
 'misc': 0.8222884659481771,
 'uncoveredpool': 1.3588480485032208}

In [5]:
# Augmentations, brightness, rotation etc.
# Normalize RBG values 0> 1 : rescale=1./255,

model = vit.vit_l32(
    image_size=IMAGE_SIZE,
    activation='sigmoid',
    pretrained=True,
    include_top=False,
    pretrained_top=False,
)

train_datagen = ImageDataGenerator(
        brightness_range=[0.5, 1.5],
        rotation_range=.1,
        width_shift_range=0.15,
        height_shift_range=0.15,
        rescale=1./255,
        zoom_range=0.05,
        horizontal_flip=True,
        vertical_flip = False,
        fill_mode='constant')

val_datagen = ImageDataGenerator(rescale=1./255)

In [6]:
# Keras function just to get the formats correct for keras input
train_generator=train_datagen.flow_from_dataframe(
dataframe=train_df,
x_col="filepath",
y_col="target",
batch_size=MODEL_PARAMETERS['batch_size'],
#has_ext=True,
shuffle=True,
class_mode="categorical",
target_size=(IMAGE_SIZE,IMAGE_SIZE))

Found 7172 validated image filenames belonging to 7 classes.


In [7]:
# Keras function just to get the formats correct for keras input


val_generator=val_datagen.flow_from_dataframe(
dataframe=val_df,
x_col="filepath",
y_col="target",
batch_size=MODEL_PARAMETERS['batch_size'],
#has_ext=True,
shuffle=False,
class_mode="categorical",
target_size=(IMAGE_SIZE,IMAGE_SIZE))

Found 700 validated image filenames belonging to 7 classes.


In [8]:
# inputs = layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
x = model.output
# x = layers.GlobalAveragePooling2D()(x) # TODO should 
predictions = layers.Dense(7)(x)
new_model = Model(inputs=model.input, outputs=[predictions])

In [9]:
# Json describes the structure of the model, nodes / edges, encoding , normalization etc

model_json = new_model.to_json()
with open(model_output_folder + "guardian_model.json", "w") as json_file:
    json_file.write(model_json)
    

In [10]:
#Monitoring status and writing out based on conditions such as best only true

es = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=.5*MODEL_PARAMETERS['num_epochs'])
mc = ModelCheckpoint(
    model_output_folder + "guardian_model_efficientdet.{val_loss:.2f}.h5",
    monitor="val_loss",
    mode='min',
    verbose=1, 
    save_best_only=True,
    save_weights_only=True,
    save_freq="epoch"
)

In [11]:
# For each epoch, looks at all images
# Batch means when it will update the loss

nbatches_train, mod = divmod(train_df.shape[0], MODEL_PARAMETERS['batch_size'])
STEP_SIZE_VALID=val_generator.n//val_generator.batch_size

In [12]:
new_model.compile(
    optimizer="sgd",
    loss="categorical_crossentropy")

In [None]:
# Loss decreased, then climbed to a stable higher value
# We will evaluate based on the "test" data

new_model.fit(
    train_generator,
    validation_data=val_generator,
    validation_steps=STEP_SIZE_VALID,
    steps_per_epoch=nbatches_train,
    epochs=MODEL_PARAMETERS['num_epochs'],
    workers=8,
    shuffle=True,
    verbose=1,
    callbacks=[mc, es])


Epoch 1/50
  40/1434 [..............................] - ETA: 10:27 - loss: 7.5999