In [1]:
import tensorflow as tf 
import os
from random import shuffle
# gpus = tf.config.experimental.list_physical_devices('GPU')
# for gpu in gpus:
#     tf.config.experimental.set_memory_growth(gpu, True)
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

2022-03-18 20:54:36.085788: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
os.chdir('/root/fish_class')
data_directory = os.getcwd()
print(data_directory)
!ls

/root/fish_class
Data					 a-large-scale-fish-dataset.zip
ENEL645_FinalProject_FishClassification  training_1
Model


1. Loading Data and Preprocessing

In [3]:
# 20% Validation Set, 80% Training Set
# Input data is balanced across the number of fish classes
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input, # Preprocessing function
    validation_split=0.2 
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input # Preprocessing function
)

In [4]:
# Shuffle = True randomly selects images from a random directory/class to meet the streaming batch size and send to the model for training
# Instead of flow_from_directory, the following article: https://www.kaggle.com/pavfedotov/fish-classifier-efficientnet-acc-100, uses flow_from_dataframe
# which simply contains the list of all image paths in directory and the corresponding class label, we can pivot to this method if it is difficult
# to visualize results, but the method below is actually more efficient...
train_images = train_generator.flow_from_directory(
    directory= './Data/Train_Val',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_directory(
    directory= './Data/Train_Val',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation' # Will only take 20% of the total data as the validation data
)

test_images = test_generator.flow_from_directory(
    directory= './Data/Test',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42
)

Found 7211 images belonging to 9 classes.
Found 1801 images belonging to 9 classes.
Found 430 images belonging to 9 classes.


In [5]:
print("Training image shape:", train_images.image_shape)
print("Validation image shape:", val_images.image_shape)
print("Test image shape:", test_images.image_shape)

Training image shape: (224, 224, 3)
Validation image shape: (224, 224, 3)
Test image shape: (224, 224, 3)


In [6]:
train_images.class_indices

{'Black Sea Sprat': 0,
 'Gilt-Head Bream': 1,
 'Hourse Mackerel': 2,
 'Red Mullet': 3,
 'Red Sea Bream': 4,
 'Sea Bass': 5,
 'Shrimp': 6,
 'Striped Red Mullet': 7,
 'Trout': 8}

In [7]:
val_images.class_indices

{'Black Sea Sprat': 0,
 'Gilt-Head Bream': 1,
 'Hourse Mackerel': 2,
 'Red Mullet': 3,
 'Red Sea Bream': 4,
 'Sea Bass': 5,
 'Shrimp': 6,
 'Striped Red Mullet': 7,
 'Trout': 8}

In [8]:
test_images.class_indices

{'Black Sea Sprat': 0,
 'Gilt Head Bream': 1,
 'Horse Mackerel': 2,
 'Red Mullet': 3,
 'Red Sea Bream': 4,
 'Sea Bass': 5,
 'Shrimp': 6,
 'Striped Red Mullet': 7,
 'Trout': 8}

In [9]:
import tensorflow.keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, Input
import numpy as np
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

2. Defining VGG16 (CNN) Architecture

In [13]:
# Original VGG16 implementation, seems not be well suited for this dataset
# input = Input(shape =(224,224,3))
# x = Conv2D (filters =64, kernel_size =3, padding ='same', activation='relu')(input)
# x = Conv2D (filters =64, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# x = Conv2D (filters =128, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =128, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x) 
# x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x) 
# x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x) 
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# x = Flatten()(x) 
# x = Dense(units = 4096, activation ='relu')(x) 
# x = Dense(units = 4096, activation ='relu')(x) 
# output = Dense(units = 9, activation ='softmax')(x)
# model = Model (inputs=input, outputs =output)
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Uses default LR or 0.001


# THIS WORKS FOR SOME REASON!
# Shallower model, simply halving image size while doubling filters, has more parameteres but performs way better in less time
input = Input(shape =(224,224,3))
x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(input)
x = MaxPool2D(2,2)(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(input)
x = MaxPool2D(2,2)(x)
x = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(input)
x = MaxPool2D(2,2)(x)
x = Dropout(0.2)(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dense(256, activation='relu')(x)
output = Dense(9, activation='softmax')(x)
model = Model (inputs=input, outputs =output)
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

2022-03-18 21:07:55.734846: W tensorflow/core/common_runtime/bfc_allocator.cc:433] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.50GiB (rounded to 1614938112)requested by op Add
Current allocation summary follows.
2022-03-18 21:07:55.734926: I tensorflow/core/common_runtime/bfc_allocator.cc:972] BFCAllocator dump for GPU_0_bfc
2022-03-18 21:07:55.734948: I tensorflow/core/common_runtime/bfc_allocator.cc:979] Bin (256): 	Total Chunks: 42, Chunks in use: 42. 10.5KiB allocated for chunks. 10.5KiB in use in bin. 1.0KiB client-requested in use in bin.
2022-03-18 21:07:55.734962: I tensorflow/core/common_runtime/bfc_allocator.cc:979] Bin (512): 	Total Chunks: 4, Chunks in use: 4. 2.0KiB allocated for chunks. 2.0KiB in use in bin. 2.0KiB client-requested in use in bin.
2022-03-18 21:07:55.734975: I tensorflow/core/common_runtime/bfc_allocator.cc:979] Bin (1024): 	Total Chunks: 7, Chunks in use: 7. 7.2KiB allocated for chunks. 7.2KiB in use in bin. 7.0KiB client-requested in use

ResourceExhaustedError: OOM when allocating tensor with shape[1577088,256] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Add]

3. Defining Schedulers and Callbacks

In [11]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience = 5) # Fine tune
checkpoint_path = "training_1/cp.ckpt"
monitor = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss',
                                             verbose=1,save_best_only=True,
                                             save_weights_only=True,
                                             mode='min') # Only saves the best model (so far) in terms of min validation loss
# # Learning rate schedule
# def scheduler(epoch, lr): # Fine tune
#     if epoch%10 == 0: # Occurs on 10, 20, 30, 40, 50
#         lr = lr/2 
#     return lr

# lr_schedule = tf.keras.callbacks.LearningRateScheduler(scheduler,verbose = 1)
lr_schedule = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.1, patience=5, min_lr=0.000001, verbose=1)
callbacks = [early_stop, monitor, lr_schedule]

4. Training Model

In [12]:
model.fit(
    train_images, 
    validation_data=val_images, 
    epochs=50, # Fine tune
    callbacks=callbacks
)

2022-03-18 20:55:17.215162: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-03-18 20:55:17.216426: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2300015000 Hz


Epoch 1/50


2022-03-18 20:55:17.866021: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-03-18 20:55:18.229965: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2022-03-18 20:55:19.147425: W tensorflow/stream_executor/gpu/asm_compiler.cc:63] Running ptxas --version returned 256
2022-03-18 20:55:19.181085: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: ptxas exited with non-zero error code 256, output: 
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
2022-03-18 20:55:20.735047: W tensorflow/core/common_runtime/bfc_allocator.cc:248] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.83GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.



Epoch 00001: val_accuracy improved from -inf to 0.91782, saving model to training_1/cp.ckpt
Epoch 2/50

Epoch 00002: val_accuracy improved from 0.91782 to 0.97501, saving model to training_1/cp.ckpt
Epoch 3/50

Epoch 00003: val_accuracy improved from 0.97501 to 0.97835, saving model to training_1/cp.ckpt
Epoch 4/50

Epoch 00004: val_accuracy did not improve from 0.97835
Epoch 5/50

Epoch 00005: val_accuracy did not improve from 0.97835


<tensorflow.python.keras.callbacks.History at 0x7fe2cf97f190>

In [1]:
model.save('Model')

NameError: name 'model' is not defined

In [None]:
print("\n************************ COMPLETED ************************")