In [9]:
! mkdir ~/.kaggle
! touch ~/.kaggle/kaggle.json

api_token = {"username": "your-username",
             "key": "your-key"}

import json

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)

! chmod 600 ~/.kaggle/kaggle.json

! kaggle datasets download -d giannisgeorgiou/fish-species

! unzip fish-species.zip

In [12]:
# Imports cell

from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
import random
import os
from shutil import copyfile, rmtree
from pathlib import Path
from glob import glob
import itertools
import warnings

warnings.filterwarnings("ignore")

In [None]:
# Directory of all 20 classes
INPUT_CLASSES_DIR = "./Species/Training_Set/"
folders = glob(INPUT_CLASSES_DIR + '/*')
n_classes = len(folders)

# Number of samples per class for training (less than 1700)
TRAIN_SAMPLE_SIZE = 200

# Number of samples per class for testing (less than 300)
TEST_SAMPLE_SIZE = 20

In [None]:
# Copy all images to use for training and testing

test_root = "./test"
train_root = "./train"

rmtree(train_root, ignore_errors=True)
rmtree(test_root, ignore_errors=True)

for dirname in folders:
    current_subfolder = dirname.split("/")[-1]
    folder_sample = random.sample(glob(dirname + "/*"), TRAIN_SAMPLE_SIZE + TEST_SAMPLE_SIZE)
    Path(f"{test_root}/{current_subfolder}").mkdir(parents=True, exist_ok=True)
    Path(f"{train_root}/{current_subfolder}").mkdir(parents=True, exist_ok=True)
    train_filenames = folder_sample[:TRAIN_SAMPLE_SIZE]
    for f in train_filenames:
        copyfile(f, f"{train_root}/{current_subfolder}/{f.split('/')[-1]}")
    test_filenames = folder_sample[TRAIN_SAMPLE_SIZE:]
    for f in test_filenames:
        copyfile(f, f"{test_root}/{current_subfolder}/{f.split('/')[-1]}")

train_files = glob(train_root + "/*/*.jp*g")
test_files = glob(test_root + "/*/*.jp*g")

print(len(train_files))
print(len(test_files))

In [None]:
plt.imshow(image.load_img(np.random.choice(train_files)))

In [None]:
# ResNet50 network
image_size = [200, 200]
resnet = ResNet50(input_shape=image_size + [3], weights="imagenet", include_top=False)

# Make ResNet weights non trainable
for layer in resnet.layers:
    layer.trainable = False

# Print summary
resnet.summary()

In [None]:
# Chain a neural network at the end
x = Flatten()(resnet.output)
prediction = Dense(n_classes, activation="softmax")(x)

# Full model using the Functional API
model = Model(inputs=resnet.input, outputs=prediction)

model.summary()

In [None]:
# Compile the model using appropriate loss and optimizer
model.compile(loss="sparse_categorical_crossentropy",
              optimizer="adam", metrics=["accuracy"])

In [None]:
# Create ImageDataGenerator objects for training and testing

batch_size = 128

train_gen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function=preprocess_input
)

val_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

train_generator = train_gen.flow_from_directory(
    train_root,
    target_size=image_size,
    shuffle=True,
    batch_size=batch_size,
    class_mode="sparse",
)

valid_generator = val_gen.flow_from_directory(
    test_root,
    target_size=image_size,
    shuffle=False,
    batch_size=batch_size,
    class_mode="sparse",
)

test_gen = val_gen.flow_from_directory(
    test_root, 
    target_size=image_size
)

In [None]:
### Translate labels into species

labels = [None] * len(test_gen.class_indices)
for k, v in test_gen.class_indices.items():
    labels[v] = k

In [None]:
# Fit the model

epochs = 20
callback = EarlyStopping(monitor="loss", patience=3)

history = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[callback]
)