# CSC 370 Portfolio Exercise #2

**Plant Image Classification**

Inecption network. This version implements the Inception v3 architecture that is built in from tensorflow.

Dataset is private from the Kaggle BTTAI x NYBG competition platform.


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Import Data

In [None]:
# create folder for data
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [None]:
# download data
!kaggle competitions download -c bttai-nybg-2024

Downloading bttai-nybg-2024.zip to /content
100% 27.6G/27.7G [04:34<00:00, 126MB/s]
100% 27.7G/27.7G [04:34<00:00, 108MB/s]


In [None]:
# unzip file into folder|
!unzip "/content/bttai-nybg-2024.zip" -d "/content/bttai-nybg-2024"

## Make Dataset

In [None]:
import tensorflow as tf

In [None]:
# Load dataset & Define image directory
train_df = pd.read_csv('/content/bttai-nybg-2024/BTTAIxNYBG-train.csv')
validate_df = pd.read_csv('/content/bttai-nybg-2024/BTTAIxNYBG-validation.csv')

In [None]:
# Data augmentation configuration for training
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

In [None]:
# Convert dataframe to a dataset better for model feeding
image_directory = 'bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-train'
train_ds = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=image_directory,
        x_col='imageFile',
        y_col='classLabel',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

Found 81946 validated image filenames belonging to 10 classes.


In [None]:
# Validation image only needs rescaling as image preprocessing
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

In [None]:
image_directory = 'bttai-nybg-2024/BTTAIxNYBG-validation/BTTAIxNYBG-validation'
val_ds= validation_datagen.flow_from_dataframe(
        dataframe=validate_df,
        directory=image_directory,
        x_col='imageFile',
        y_col='classLabel',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

Found 10244 validated image filenames belonging to 10 classes.


## Model Building

Reference: https://www.tensorflow.org/tutorials/images/cnn

In [None]:
import keras
from keras.models import Sequential

from keras import layers
from keras.layers import Dense, Flatten, Dropout

from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
inception = tf.keras.applications.inception_v3.InceptionV3(
    include_top=True,
    weights=None,
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=10,
    classifier_activation='softmax'
)

In [None]:
inception.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
inception.summary()
# keras.utils.plot_model(model_basic, show_shapes=True)

## Model Training

In [None]:
checkpoint_path = "training/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path)

In [None]:
history_inception = inception.fit(train_ds, epochs=10, validation_data=val_ds,callbak=[cp_callback])

In [None]:
# set current model and current model name for visualization
curr_model = inception
curr_model_name = 'Inception'
# curr_model = alexNet
# curr_model_name = 'AlexNet'

In [None]:
# Plot out training accuracy and lost
curr_history = history_inception

acc = curr_history.history['accuracy']
val_acc = curr_history.history['val_accuracy']

loss = curr_history.history['loss']
val_loss = curr_history.history['val_loss']

epochs_range = range(10)

plt.figure(figsize=(10, 7.5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title(curr_model_name+' Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title(curr_model_name +' Training and Validation Loss')
plt.show()

## Test

In [None]:
test_df = pd.read_csv('/content/bttai-nybg-2024/BTTAIxNYBG-test.csv')

In [None]:
image_directory = 'bttai-nybg-2024/BTTAIxNYBG-test/BTTAIxNYBG-test'
test_ds= validation_datagen.flow_from_dataframe(
        dataframe=test_df,
        directory=image_directory,
        x_col='imageFile',
        y_col=None,
        target_size=(224, 224),
        shuffle=False,
        class_mode=None)

In [None]:
test_ds.reset() # to make sure all goes within sequence
test_preds = inception.predict(test_ds)
test_pred_class = [np.argmax(test_pred) for test_pred in test_preds]

In [None]:
test_df['classID'] = test_pred_class

In [None]:
submission = test_df[['uniqueID', 'classID']]

In [None]:
submission.to_csv('submission.csv',index=False)