In [27]:
import tensorflow as tf 
import os
from random import shuffle
import numpy as np
import os.path
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split

In [28]:
os.chdir('/data')
working_directory = os.getcwd()
print("working directory:", working_directory)

working directory: /data


1. Loading Data and Preprocessing

In [29]:
def make_image_df(folder):
    test_image_dir = Path('fish_data/'+folder)
    test_filepaths = list(test_image_dir.glob(r'*/*.*'))
    test_labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], test_filepaths))

    test_filepaths = pd.Series(test_filepaths, name='Filepath').astype(str)
    test_labels = pd.Series(test_labels, name='Label')
    test_image_df = pd.concat([test_filepaths, test_labels], axis=1)
    return test_image_df

test_df = make_image_df('Test')
dev_df = make_image_df('Train_Val')
total_df = pd.concat([dev_df, test_df], axis=0)

In [30]:
print(test_df.head())
test_df.shape

                                            Filepath            Label
0           fish_data/Test/Black Sea Sprat/00023.png  Black Sea Sprat
1           fish_data/Test/Black Sea Sprat/00005.png  Black Sea Sprat
2           fish_data/Test/Black Sea Sprat/00003.png  Black Sea Sprat
3           fish_data/Test/Black Sea Sprat/00009.png  Black Sea Sprat
4  fish_data/Test/Black Sea Sprat/.ipynb_checkpoints  Black Sea Sprat


(438, 2)

In [31]:
print(dev_df.head())
dev_df.shape

                                        Filepath            Label
0  fish_data/Train_Val/Black Sea Sprat/00124.png  Black Sea Sprat
1  fish_data/Train_Val/Black Sea Sprat/00960.png  Black Sea Sprat
2  fish_data/Train_Val/Black Sea Sprat/00612.png  Black Sea Sprat
3  fish_data/Train_Val/Black Sea Sprat/00365.png  Black Sea Sprat
4  fish_data/Train_Val/Black Sea Sprat/00681.png  Black Sea Sprat


(9009, 2)

In [32]:
print(total_df.head())
print(total_df.shape)

                                        Filepath            Label
0  fish_data/Train_Val/Black Sea Sprat/00124.png  Black Sea Sprat
1  fish_data/Train_Val/Black Sea Sprat/00960.png  Black Sea Sprat
2  fish_data/Train_Val/Black Sea Sprat/00612.png  Black Sea Sprat
3  fish_data/Train_Val/Black Sea Sprat/00365.png  Black Sea Sprat
4  fish_data/Train_Val/Black Sea Sprat/00681.png  Black Sea Sprat
(9447, 2)


In [33]:
dev_df, test_df = train_test_split(total_df, test_size=0.1, train_size=0.9, shuffle=True, random_state=42)
train_df, val_df = train_test_split(dev_df, test_size=0.2, train_size=0.8, shuffle=True, random_state=42)

In [34]:
dev_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255 # Could apply additional augmentation here
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(                                                    
    rescale=1./255 # Apply same normalization, not performing other preprocessing steps
)

In [35]:
# BATCH SIZE WAS ORIGINALLY 32
train_images = dev_generator.flow_from_dataframe(
    dataframe = train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42
)

val_images = dev_generator.flow_from_dataframe(
    dataframe = val_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42
)

test_images = test_generator.flow_from_dataframe(
    dataframe = test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42
)

Found 6790 validated image filenames belonging to 9 classes.
Found 1699 validated image filenames belonging to 9 classes.
Found 941 validated image filenames belonging to 9 classes.




In [36]:
print("Training image shape:", train_images.image_shape)
print("Validation image shape:", val_images.image_shape)
print("Test image shape:", test_images.image_shape)

Training image shape: (224, 224, 3)
Validation image shape: (224, 224, 3)
Test image shape: (224, 224, 3)


In [37]:
train_images.class_indices

{'Black Sea Sprat': 0,
 'Gilt Head Bream': 1,
 'Horse Mackerel': 2,
 'Red Mullet': 3,
 'Red Sea Bream': 4,
 'Sea Bass': 5,
 'Shrimp': 6,
 'Striped Red Mullet': 7,
 'Trout': 8}

In [38]:
val_images.class_indices

{'Black Sea Sprat': 0,
 'Gilt Head Bream': 1,
 'Horse Mackerel': 2,
 'Red Mullet': 3,
 'Red Sea Bream': 4,
 'Sea Bass': 5,
 'Shrimp': 6,
 'Striped Red Mullet': 7,
 'Trout': 8}

In [39]:
test_images.class_indices

{'Black Sea Sprat': 0,
 'Gilt Head Bream': 1,
 'Horse Mackerel': 2,
 'Red Mullet': 3,
 'Red Sea Bream': 4,
 'Sea Bass': 5,
 'Shrimp': 6,
 'Striped Red Mullet': 7,
 'Trout': 8}

In [40]:
import tensorflow.keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, Input, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

2. Defining VGG16 (CNN) Architecture

In [51]:
# model = tf.keras.models.Sequential([
    
#     tf.keras.layers.Conv2D(32, (3,3), activation='relu',  input_shape=(224,224,3), kernel_regularizer=tf.keras.regularizers.l2(l2=0.001)),
#     tf.keras.layers.MaxPool2D(pool_size = (2,2)),
#     tf.keras.layers.Conv2D(32, (3,3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2=0.01)),
#     tf.keras.layers.MaxPool2D(pool_size = (2,2)),
#     tf.keras.layers.Dropout(0.2),
    
#     tf.keras.layers.Conv2D(64, (3,3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2=0.001)),
#     tf.keras.layers.MaxPool2D(pool_size = (2,2)),
#     tf.keras.layers.Conv2D(64, (3,3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2=0.01)),
#     tf.keras.layers.MaxPool2D(pool_size = (2,2)),
#     tf.keras.layers.Dropout(0.2),
    
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(256, activation='relu'),
#     tf.keras.layers.Dropout(0.35),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dropout(0.25),
#     tf.keras.layers.Dense(64, activation='relu'),
#     tf.keras.layers.Dropout(0.15),
#     tf.keras.layers.Dense(9, activation='softmax')
# ])

# optimizer = tf.keras.optimizers.Adam()

# model.compile(
#     optimizer=optimizer,
#     loss='categorical_crossentropy',
#     metrics=['accuracy']
# )

# model.summary()

input = Input(shape =(224,224,3))
l1 = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(input)
l2 = MaxPool2D(2,2)(l1)
l3 = Dropout(0.2)(l2)
l4 = Conv2D(filters=64, kernel_size=(3,3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2=0.001))(l3)
l5 = MaxPool2D(2,2)(l4)
l6 = Flatten()(l5)
l7 = Dense(256, activation='relu')(l6)
l8 = Dense(256, activation='relu')(l7)
output = Dense(9, activation='softmax')(l8)
model = Model (inputs=input, outputs =output)
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 222, 222, 128)     3584      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 111, 111, 128)     0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 111, 111, 128)     0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 109, 109, 64)      73792     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 54, 54, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 186624)            0   

3. Defining Schedulers and Callbacks

In [52]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience = 10) # Fine tune
checkpoint_path = "training_1/cp.ckpt"
monitor = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss',
                                             verbose=1,save_best_only=True,
                                             save_weights_only=True,
                                             mode='min') # Only saves the best model (so far) in terms of min validation loss

def scheduler(epoch, lr):
    if epoch%10 == 0 and epoch!= 0:
        lr = lr/1.2
    return lr

lr_schedule = tf.keras.callbacks.LearningRateScheduler(scheduler,verbose = 0)
lr_schedule_on_plateau = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.1, patience=5, min_lr=0.0000001, verbose=1)
callbacks = [early_stop, monitor, lr_schedule_on_plateau,lr_schedule]

4. Training Model

In [53]:
try:
    history = model.fit(
        train_images, 
        validation_data=val_images, 
        epochs=50, # Fine tune
        callbacks=callbacks
    )
except KeyboardInterrupt:
    print("\nmodel training terminated\n")

Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.55074, saving model to training_1/cp.ckpt
Epoch 2/50

Epoch 00002: val_loss improved from 0.55074 to 0.27203, saving model to training_1/cp.ckpt
Epoch 3/50

Epoch 00003: val_loss improved from 0.27203 to 0.24697, saving model to training_1/cp.ckpt
Epoch 4/50

Epoch 00004: val_loss improved from 0.24697 to 0.20862, saving model to training_1/cp.ckpt
Epoch 5/50

Epoch 00005: val_loss improved from 0.20862 to 0.20441, saving model to training_1/cp.ckpt
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.20441
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.20441
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.20441
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.20441
Epoch 10/50
model training terminated



In [None]:
np.save('history.npy', history.history)

NameError: name 'history' is not defined

In [None]:
model.save('Model')

In [None]:
print("\n************************ COMPLETED TRAINING ************************")

5. Loading Best Model and Testing

In [54]:
model.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f241c6ef9d0>

In [48]:
history=np.load('history.npy', allow_pickle='TRUE').item()
print("Best training results:\n", history)

Best training results:
 {'loss': [3.3339972496032715, 1.0260932445526123, 0.7786084413528442, 0.6668128967285156, 0.5769358277320862, 0.5501341819763184, 0.5038965940475464, 0.466381698846817, 0.45221200585365295, 0.4148716330528259, 0.38485780358314514, 0.3669179677963257, 0.3282333016395569, 0.35841265320777893, 0.35493895411491394, 0.3266587257385254, 0.2926563620567322, 0.3457963764667511, 0.20166921615600586, 0.1688028872013092, 0.15034224092960358, 0.1498643308877945, 0.14933903515338898, 0.14482682943344116, 0.14870086312294006, 0.1407342404127121, 0.139243483543396, 0.14170846343040466, 0.11717443913221359, 0.12590529024600983, 0.11756332218647003, 0.11326565593481064, 0.11039110273122787, 0.1157182827591896, 0.11841946095228195], 'accuracy': [0.34081462025642395, 0.6172069907188416, 0.7103075385093689, 0.7529786825180054, 0.7853975892066956, 0.7980049848556519, 0.8140759468078613, 0.8239124417304993, 0.8294541239738464, 0.8403990268707275, 0.856747031211853, 0.8621501922607422

In [49]:
history.get('val_accuracy')

[0.5257903337478638,
 0.6699944734573364,
 0.6955074667930603,
 0.6106489300727844,
 0.6960621476173401,
 0.7487520575523376,
 0.7698280811309814,
 0.7459789514541626,
 0.7287853360176086,
 0.7226844429969788,
 0.7681641578674316,
 0.7909040451049805,
 0.7903494238853455,
 0.722129762172699,
 0.7237936854362488,
 0.7193566560745239,
 0.7714919447898865,
 0.7398779988288879,
 0.7859123945236206,
 0.7864670157432556,
 0.819744884967804,
 0.8119800090789795,
 0.7764836549758911,
 0.8053244352340698,
 0.8080976009368896,
 0.8103161454200745,
 0.8153077960014343,
 0.8136439323425293,
 0.8080976009368896,
 0.8153077960014343,
 0.8225180506706238,
 0.8236272931098938,
 0.8225180506706238,
 0.8252911567687988,
 0.8175263404846191]

In [55]:
results = model.evaluate(test_images, verbose=1)

print("Categorical Cross Entropy: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

Categorical Cross Entropy: 0.17414
Test Accuracy: 94.90%


In [None]:
print("\n************************ COMPLETED TESTING ************************")