In [None]:
!rm -rf res
!rm -f cv_res.zip

!mkdir res

In [None]:
import os
import cv2 as cv
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras import *
from pathlib import Path
from tqdm import tqdm
from keras.utils.vis_utils import plot_model
import gc

In [None]:
ROOT = Path('/kaggle/input') / 'cv-final-project-data' / 'dataset'
SEED = 0
IMG_SIZE = 256
BATCH_SIZE = 32
labels = ['bee', 'wasp', 'insect', 'flower']
table_data = []

In [None]:
df = pd.read_csv(str(ROOT / 'labels.csv'))
df['path'] = [row.replace('\\', os.sep) for row in df['path']]
df.head()

In [None]:
def read_data(df, label, n_samples=5):
    q = df[df['label'] == label]
    samples = q.sample(n=n_samples)

    fig, ax = plt.subplots(nrows=1, ncols=n_samples, figsize=(4*n_samples,4))
    for i, path in enumerate(samples['path']):
        img = cv.imread(str(ROOT / path), -1)
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        ax[i].imshow(img)
        ax[i].axis('off')
    fig.suptitle(f'Label: {label}')
    plt.savefig(f'res{os.sep}{label}.png', bbox_inches='tight')
    plt.show()

    n = len(q)
    low = len(q[q['photo_quality'] == 0])
    val = len(q[q['is_validation'] == 1])
    test = len(q[q['is_final_validation'] == 1])
    print(f'Total number of samples for {label}: {n}')
    if label == 'bee' or label == 'wasp':
        print(f'Number of low quality samples: {low}')
        print(f'Number of high quality samples: {n - low}')
    print(f'Number of training samples: {n - val - test}')
    print(f'Number of validation samples: {val}')
    print(f'Number of testing samples: {test}')

In [None]:
read_data(df, label=labels[0])

In [None]:
read_data(df, label=labels[1])

In [None]:
read_data(df, label=labels[2])

In [None]:
read_data(df, label=labels[3])

In [None]:
plt.figure(figsize=(12, 6))
plt.bar(labels, [len(df[df['label'] == label]) for label in labels])
plt.title('Dataset size for each category')
plt.xlabel('Category')
plt.ylabel('Number of samples')
plt.savefig(f'res{os.sep}bar_plot.png', bbox_inches='tight')
plt.show()

In [None]:
train_df = df[(df['is_validation'] == 0) & (df['is_final_validation'] == 0)].reset_index(drop=True)
val_df = df[df['is_validation'] == 1].reset_index(drop=True)
test_df = df[df['is_final_validation'] == 1].reset_index(drop=True)

In [None]:
# def create_datasets(df, img_size):
#     imgs = []
#     for path in tqdm(df['path']):
#         img = cv.imread(str(ROOT / path))
#         img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
#         img = cv.resize(img, (img_size,img_size))
#         imgs.append(img)
        
#     imgs = np.array(imgs, dtype='float32')
#     imgs = imgs / 255.0
#     df = pd.get_dummies(df['label'])
#     return imgs, df


# train_imgs, train_df = create_datasets(train_df, IMG_SIZE)
# val_imgs, val_df = create_datasets(val_df, IMG_SIZE)
# test_imgs, test_df = create_datasets(test_df, IMG_SIZE)

In [None]:
datagen = preprocessing.image.ImageDataGenerator(rescale=1./255.)

train_datagen = datagen.flow_from_dataframe(train_df, 
                                            directory=str(ROOT), 
                                            x_col='path', 
                                            y_col='label',
                                            target_size=(IMG_SIZE, IMG_SIZE),
                                            seed=SEED
                                           ) 

val_datagen = datagen.flow_from_dataframe(val_df,
                                            directory=str(ROOT), 
                                            x_col='path', 
                                            y_col='label',
                                            target_size=(IMG_SIZE, IMG_SIZE),
                                            seed=SEED
                                           ) 

test_datagen = datagen.flow_from_dataframe(test_df, 
                                           directory=str(ROOT), 
                                           x_col='path', 
                                           y_col='label',
                                           target_size=(IMG_SIZE, IMG_SIZE),
                                           seed=SEED
                                           ) 

In [None]:
# for directly loading on the memory
# train_X = train_imgs
# train_Y = train_df
# validation_all = (val_imgs, val_df)
# test_X = test_imgs
# test_Y = test_df

# for generators
train_X = train_datagen
train_Y = None
validation_all = val_datagen
test_X = test_datagen
test_Y = None

In [None]:
model = models.Sequential()
model.add(Input(shape=(IMG_SIZE, IMG_SIZE, 3)))
pre_net = applications.ResNet50V2(input_shape=(IMG_SIZE, IMG_SIZE, 3), 
                                include_top=False)
model.add(pre_net)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dropout(0.2))
model.add(layers.Dense(4, activation='softmax'))

model.compile(optimizer=optimizers.Adam(learning_rate=0.0001),
              loss=losses.BinaryCrossentropy(),
              metrics=['accuracy'])
model.summary()

In [None]:
plot_model(model, to_file=f'res{os.sep}model1.png')

In [None]:
es_callback = callbacks.EarlyStopping(patience=5, 
                                      verbose=1, 
                                      restore_best_weights=True)

mobile_wo_train_hist = model.fit(x=train_X,
                                 y=train_Y,
                                 batch_size=BATCH_SIZE,
                                 epochs=50, 
                                 validation_data=validation_all,
                                 callbacks=[es_callback])

In [None]:
loss, acc = model.evaluate(test_X, test_Y)
table_data.append(('ResNet', round(acc, 4), round(loss, 4)))

In [None]:
model = models.Sequential()
model.add(Input(shape=(IMG_SIZE, IMG_SIZE, 3)))
pre_net = applications.MobileNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3), 
                                   include_top=False)
model.add(pre_net)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dropout(0.2))
model.add(layers.Dense(4, activation='softmax'))

model.compile(optimizer=optimizers.Adam(learning_rate=0.0001),
              loss=losses.BinaryCrossentropy(),
              metrics=['accuracy'])
model.summary()

In [None]:
plot_model(model, to_file=f'res{os.sep}model2.png')

In [None]:
es_callback = callbacks.EarlyStopping(patience=5, 
                                      verbose=1, 
                                      restore_best_weights=True)

mobile_w_train_hist = model.fit(x=train_X,
                                y=train_Y,
                                batch_size=BATCH_SIZE,
                                epochs=50, 
                                validation_data=validation_all,
                                callbacks=[es_callback])

In [None]:
loss, acc = model.evaluate(test_X, test_Y)
table_data.append(('MobileNet', round(acc, 4), round(loss, 4)))

In [None]:
model = models.Sequential()
model.add(Input(shape=(IMG_SIZE, IMG_SIZE, 3)))
model.add(layers.Conv2D(128, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D(3))
model.add(layers.Dropout(0.2))
model.add(layers.Conv2D(256, 3, padding='same', activation='relu'))
model.add(layers.MaxPooling2D(3))
model.add(layers.Dropout(0.2))
# model.add(layers.Conv2D(512, 3, padding='same', activation='relu'))
# model.add(layers.MaxPooling2D(3))
# model.add(layers.Dropout(0.2))
model.add(layers.Flatten())
model.add(layers.Dense(4, activation='softmax'))

model.compile(optimizer=optimizers.Adam(learning_rate=0.0001),
              loss=losses.BinaryCrossentropy(),
              metrics=['accuracy'])
model.summary()

In [None]:
plot_model(model, to_file=f'res{os.sep}model3.png')

In [None]:
es_callback = callbacks.EarlyStopping(patience=5, 
                                      verbose=1, 
                                      restore_best_weights=True)

custom_cnn = model.fit(x=train_X,
                       y=train_Y,
                       batch_size=BATCH_SIZE,
                       epochs=50, 
                       validation_data=validation_all,
                       callbacks=[es_callback])

In [None]:
loss, acc = model.evaluate(test_X, test_Y)
table_data.append(('Custom CNN', round(acc, 4), round(loss, 4)))

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(mobile_wo_train_hist.history['accuracy'], label='ResNet train accuracy', linestyle='-', color='b')
plt.plot(mobile_wo_train_hist.history['val_accuracy'], label='ResNet val accuracy', linestyle=':', color='b')
plt.plot(mobile_w_train_hist.history['accuracy'], label='MobileNet train accuracy', linestyle='-', color='g')
plt.plot(mobile_w_train_hist.history['val_accuracy'], label='MobileNet val accuracy', linestyle=':', color='g')
plt.plot(custom_cnn.history['accuracy'], label='Custom CNN train accuracy', linestyle='-', color='r')
plt.plot(custom_cnn.history['val_accuracy'], label='Custom CNN val accuracy', linestyle=':', color='r')
plt.title('Training and validation accuracy')
plt.xlabel('Number of epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig(f'res{os.sep}acc.png', bbox_inches='tight')
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(mobile_wo_train_hist.history['loss'], label='ResNet train loss', linestyle='-', color='b')
plt.plot(mobile_wo_train_hist.history['val_loss'], label='ResNet val loss', linestyle=':', color='b')
plt.plot(mobile_w_train_hist.history['loss'], label='MobileNet train loss', linestyle='-', color='g')
plt.plot(mobile_w_train_hist.history['val_loss'], label='MobileNet val loss', linestyle=':', color='g')
plt.plot(custom_cnn.history['loss'], label='Custom CNN train loss', linestyle='-', color='r')
plt.plot(custom_cnn.history['val_loss'], label='Custom CNN val loss', linestyle=':', color='r')
plt.title('Training and validation loss')
plt.xlabel('Number of epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig(f'res{os.sep}loss.png', bbox_inches='tight')
plt.show()

In [None]:
title='table'
cols = ['Model', 'Test accuracy', 'Test loss']

fig, ax = plt.subplots()
table = ax.table(cellText=table_data, colLabels=cols,
                 cellLoc='center', loc='center')
table.auto_set_font_size(False)
table.set_fontsize(55)
table.scale(5, 15)
ax.axis('off')
plt.savefig(f'res{os.sep}{title}.png', bbox_inches='tight')
plt.show()

In [None]:
!zip -r cv_res.zip res