# CSC 370 Portfolio Exercise #4

**Plant Image Classification**

Inecption Resnet network.

Dataset is private from the Kaggle BTTAI x NYBG competition platform.


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Make Dataset

In [None]:
# create folder for data
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# download data
!kaggle competitions download -c bttai-nybg-2024

Downloading bttai-nybg-2024.zip to /content
100% 27.7G/27.7G [24:25<00:00, 22.8MB/s]
100% 27.7G/27.7G [24:25<00:00, 20.3MB/s]


In [None]:
# unzip file into folder|
!unzip "/content/bttai-nybg-2024.zip" -d "/content/bttai-nybg-2024"

In [None]:
import tensorflow as tf

In [None]:
file = '/Users/yuhanw/Desktop/BTTAI/'
# file = ''

In [None]:
# Load dataset & Define image directory
train_df = pd.read_csv(file+'bttai-nybg-2024/BTTAIxNYBG-train.csv')
validate_df = pd.read_csv(file+'bttai-nybg-2024/BTTAIxNYBG-validation.csv')

In [None]:
# Data augmentation configuration for training
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

In [None]:
# Convert dataframe to a dataset better for model feeding
image_directory = file+'bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-train'
train_ds = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=image_directory,
        x_col='imageFile',
        y_col='classLabel',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

Found 81946 validated image filenames belonging to 10 classes.


In [None]:
# Validation image only needs rescaling as image preprocessing
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

In [None]:
image_directory = file+'bttai-nybg-2024/BTTAIxNYBG-validation/BTTAIxNYBG-validation'
val_ds= validation_datagen.flow_from_dataframe(
        dataframe=validate_df,
        directory=image_directory,
        x_col='imageFile',
        y_col='classLabel',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

Found 10244 validated image filenames belonging to 10 classes.


## Model Building

Reference: https://www.tensorflow.org/tutorials/images/cnn

In [None]:
import keras
from keras.models import Sequential

from keras import layers
from keras.layers import Dense, Flatten, Dropout

from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
inception_resnet = tf.keras.applications.inception_resnet_v2.InceptionResNetV2(
    include_top=True,
    weights=None,
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=10,
    classifier_activation='softmax'
)

In [None]:
inception_resnet.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
inception_resnet.summary()
# keras.utils.plot_model(model_basic, show_shapes=True)

Model: "inception_resnet_v2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 299, 299, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 149, 149, 32)         864       ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 149, 149, 32)         96        ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 activation (Activation)     (None, 149, 149, 32)         0         ['batch_norm

## Model Training

In [None]:
checkpoint_path = "training/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path)

In [None]:
history_inception_resnet = inception_resnet.fit(train_ds, epochs=1, validation_data=val_ds)

  output, from_logits = _get_logits(


   1/2561 [..............................] - ETA: 25:10:52 - loss: 2.4055 - accuracy: 0.0625

KeyboardInterrupt: 

In [None]:
# set current model and current model name for visualization
curr_model = inception_resnet
curr_model_name = 'Inception_ResNet'
# curr_model = alexNet
# curr_model_name = 'AlexNet'

In [None]:
# Plot out training accuracy and lost
curr_history = history_inception_resnet

acc = curr_history.history['accuracy']
val_acc = curr_history.history['val_accuracy']

loss = curr_history.history['loss']
val_loss = curr_history.history['val_loss']

epochs_range = range(6)

plt.figure(figsize=(10, 7.5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title(curr_model_name+' Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title(curr_model_name +' Training and Validation Loss')
plt.show()

## Evaluate

*Because this Kaggle competition does not provide test label, I will use validation dataset to produce some visual evaluation.

In [None]:
results = curr_model.evaluate(val_ds)
print("Final loss, Final accuracy:", results)

Final loss, Final accuracy: [0.5460229516029358, 0.8206754922866821]


In [None]:
# Save predicted labels & true labels
val_ds.reset()
y_predictions = curr_model.predict(val_ds)
y_pred_class = [np.argmax(y_prediction) for y_prediction in y_predictions] #returns the model's prediction
y_true = validate_df['classID'].to_numpy()



In [None]:
# This is the added part that debugs if predictions are made correctly based on the data sequence and the label sequence
unique,frequency = np.unique(y_true==y_pred_class,return_counts=True)
print(unique,frequency)

[False  True] [1224 9020]


This part is to predict on test data

## Test

In [None]:
test_df = pd.read_csv(file+'bttai-nybg-2024/BTTAIxNYBG-test.csv')

In [None]:
image_directory = file+'bttai-nybg-2024/BTTAIxNYBG-test/BTTAIxNYBG-test'
test_ds= validation_datagen.flow_from_dataframe(
        dataframe=test_df,
        directory=image_directory,
        x_col='imageFile',
        y_col=None,
        target_size=(224, 224),
        shuffle=False,
        class_mode=None)

In [None]:
test_ds.reset() # to make sure all goes within sequence
test_preds = inception_resnet.predict(test_ds)
test_pred_class = [np.argmax(test_pred) for test_pred in test_preds]

In [None]:
test_df['classID'] = test_pred_class

In [None]:
submission = test_df[['uniqueID', 'classID']]

In [None]:
submission.to_csv('inception_resnet_submission.csv',index=False)