Dataset: https://www.kaggle.com/datasets/slothkong/10-monkey-species <br>
Multi-class Classification <br>
Custom CNN

In [19]:
from keras.models import Sequential
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten

In [20]:
def create_cnn():
    cnn = Sequential()
    cnn.add(Input(shape=(400,300,3))) # (400,300)
    cnn.add(Conv2D(filters=5,kernel_size=5,activation="relu")) # (396,296)
    cnn.add(MaxPooling2D()) # (198,148) # MaxPooling layer reduces the image size by half 
    cnn.add(Conv2D(filters=10,kernel_size=5,activation="relu")) # (194,144,10)
    cnn.add(MaxPooling2D()) # (97,72,10) # MaxPooling does not affect the number of channels
    cnn.add(Conv2D(filters=20,kernel_size=5,activation="relu")) # (93,68)
    cnn.add(MaxPooling2D()) # (46,34)
    cnn.add(Conv2D(filters=40,kernel_size=5,activation="relu")) # (42,30)
    cnn.add(MaxPooling2D()) # (21,15,40)
    cnn.add(Conv2D(filters=80,kernel_size=5,activation="relu")) # (17,11,80)
    cnn.add(MaxPooling2D()) # (8,5,80)
    cnn.add(Flatten()) # vector length = 8*5*80 = 3200
    cnn.add(Dense(units=3200,activation="relu"))
    cnn.add(Dense(units=10,activation="softmax")) # 10 units for 10 species (class) of monkeys
    return cnn

In [21]:
cnn = create_cnn()

In [22]:
cnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 396, 296, 5)       380       
                                                                 
 max_pooling2d (MaxPooling2  (None, 198, 148, 5)       0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 194, 144, 10)      1260      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 97, 72, 10)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 93, 68, 20)        5020      
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 46, 34, 20)        0

#### Reason of using MaxPooling layer and our choice of filters number
Size of image due to convolution does not decrease much. If we don't use MaxPooling layer then length of vector (size of image times number of filters) we get in Flatten layer will be very large. We will get very large number of feature. No. of parameters of Dense layer will be square of that large number. So, we will get very huge number of parameters in our Dense layers. This will make our model very slow to train. <br>
So, we use MaxPooling layer so that image dimension get reduced to moderate value till Flatten layer. To compensate this reduction we are increasing the number of filters. matrix size is getting reduced but the depth is increasing due increase in number of filters.

<hr>

Making generator

In [23]:
from keras.preprocessing.image import ImageDataGenerator

In [24]:
datagen = ImageDataGenerator(rescale=1/255.0)
# rescale = 1/255.0 => It will divide every pexel value with 255. 
# Due to this values get b/w 0 and 1 (max normalisation)

In [25]:
import os

In [26]:
categories_list = os.listdir("03_Dataset/training/training")

In [27]:
categories_list
# order we are getting is random

['n0', 'n7', 'n9', 'n8', 'n6', 'n1', 'n4', 'n3', 'n2', 'n5']

##### Order of classes passed matters
In case of binary class classification, <br>
If we pass ["Cat","Dog"] then cat is given label = 1 (model predict probability of cat) <br>
Else if we pass ["Dog","Cat"] then dog is given label = 1 (model predict probability of dog) <br> <br>
In case of multi-class classification, <br>
The class whose folder name is placed at index i will get encoded with label=i. <br>
So, in probability vector, outputed by the model, probability of this class is filled at index i. <br> <br>

Here, you can see that we have filled "n9" at index 2, so probality of an image belonging to class "n9" will be filled at index 2. <br>
If ground truth of an image is class n9 then its ground truth y (label data) have 1 at index 3 and 0 at rest of the indices. <br> <br>

In [28]:
total_training_images=0
total_validation_images=0
for folder_name in os.listdir("03_Dataset/training/training"):
    total_training_images += len(os.listdir("03_Dataset/training/training"+"/"+folder_name))
    total_validation_images+=len(os.listdir("03_Dataset/validation/validation/"+folder_name))

print("There are {} images in training directory".format(total_training_images))
print("There are {} images in validation directory".format(total_validation_images))

There are 1098 images in training directory
There are 272 images in validation directory


In [35]:
training_datagen = datagen.flow_from_directory(directory="03_Dataset/training/training",
                                               target_size=(400,300),
                                               color_mode="rgb",
                                               classes=categories_list,
                                               batch_size=18)
# CNNs do not work on variable image sizes. We have to fix every image's size
# You don't have to worry if your dataset has varying image sizes
# Each image will be adjusted to the target_size we specify in this data generator

# classes = list of folder names of each class
# Directly we can pass like this: ["n0", "n1", ..... , "n9"]

# class_mode - default is set to "categprical" which means multi-class
# For binary classification, set it to "binary"

# batch_size= should be a number which divides the datasize (training data size here)

Found 1098 images belonging to 10 classes.


In [36]:
validation_datagen = datagen.flow_from_directory(directory="03_Dataset/validation/validation",
                                               target_size=(400,300),
                                               color_mode="rgb",
                                               classes=categories_list,
                                               batch_size=272)

Found 272 images belonging to 10 classes.


In [31]:
training_datagen

<keras.src.preprocessing.image.DirectoryIterator at 0x29426ab20>

In [32]:
X_train_mini_batch,Y_train_mini_batch = training_datagen.__next__()

In [33]:
X_train_mini_batch

array([[[[0.30980393, 0.34117648, 0.2509804 ],
         [0.43921572, 0.48235297, 0.4156863 ],
         [0.43137258, 0.454902  , 0.4156863 ],
         ...,
         [0.37647063, 0.454902  , 0.41960788],
         [0.7960785 , 0.80392164, 0.7843138 ],
         [0.8235295 , 0.8235295 , 0.8235295 ]],

        [[0.2509804 , 0.29803923, 0.20392159],
         [0.39607847, 0.4431373 , 0.3647059 ],
         [0.44705886, 0.45882356, 0.4156863 ],
         ...,
         [0.34509805, 0.41176474, 0.3803922 ],
         [0.6901961 , 0.6901961 , 0.68235296],
         [0.8078432 , 0.80392164, 0.7960785 ]],

        [[0.2392157 , 0.28627452, 0.19215688],
         [0.38823533, 0.42352945, 0.34901962],
         [0.36862746, 0.37647063, 0.33333334],
         ...,
         [0.34509805, 0.38823533, 0.37254903],
         [0.6039216 , 0.59607846, 0.60784316],
         [0.8117648 , 0.8078432 , 0.8000001 ]],

        ...,

        [[0.39607847, 0.34509805, 0.3137255 ],
         [0.26666668, 0.22352943, 0.20784315]

In [18]:
Y_train_mini_batch
# You can see the one hot encoding here
# These are ground truth vectors
# Values inside vectors representing probabilities
# 18 vectors (for 18 images)

array([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [37]:
import tensorflow as tf

In [38]:
cnn.compile(loss="categorical_crossentropy",
            metrics=["accuracy",tf.keras.metrics.Precision(),tf.keras.metrics.Recall()])
# keeping default optimiser (rmsprop)

In [39]:
cnn.fit(training_datagen,epochs=15,validation_data=validation_datagen)

Epoch 1/15


2023-06-02 14:45:26.556357: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2023-06-02 14:46:05.167334: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x29ba46d00>