__PROJECT 02__ || *__GITHUB:__ vedantabanerjee* || *__X(TWITTER):__ 0xr1sh1*

### Problem Statement

The CIFAR-10 dataset is a well-known collection of images commonly used in machine learning and computer vision applications. It consists of 60,000 color images, each with a resolution of 32x32 pixels. These images are evenly distributed across the following 10 classes:

- Airplanes
- Cars
- Birds
- Cats
- Deer
- Dogs
- Frogs
- Horses
- Ships
- Trucks

CIFAR-10, which stands for the Canadian Institute For Advanced Research, provides a comprehensive dataset for training and evaluating models on image classification tasks. Each class contains 6,000 images, offering a balanced set for robust model training and validation. The relatively low resolution of the images poses a unique challenge for classification algorithms, making it an ideal benchmark for developing and testing machine learning models.

**Data Source:** [CIFAR-10 Dataset](https://www.cs.toronto.edu/~kriz/cifar.html)

### Import Libraries and Datasets

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import keras
%matplotlib inline

In [None]:
from keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

### EDA

In [None]:
X_train.shape #it has 50k images which are 32 x 32 in res and has 3 channels - RGB

In [None]:
X_test.shape

In [None]:
y_train.shape #it has 1 column of 50k labels for 50k images in X_train 

In [None]:
y_test.shape

In [None]:
#data visualization
i = 1005
plt.imshow(X_train[i])
print(y_train[i])

In [None]:
# making a grid of 15 x 15 to visualize the dataset images at random 
W_grid = 15
L_grid = 15

#the ravel function returns a contigous flattened array
fig, axes = plt.subplots(L_grid, W_grid, figsize = (25, 25))
axes = axes.ravel()

n_training = len(X_train)

for i in np.arange(0, L_grid * W_grid):
        index = np.random.randint(0, n_training)
        axes[i].imshow(X_train[index])
        axes[i].set_title(y_train[index])
        axes[i].axis('off')
    
plt.subplots_adjust(hspace = 0.3) 

### Data Preparation

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [None]:
number_cate = 10

In [None]:
y_train

In [None]:
y_train = keras.utils.to_categorical(y_train, number_cate)

In [None]:
y_train #convertion to categorical data is important to manage the neural activation

In [None]:
y_test = keras.utils.to_categorical(y_test, number_cate)

In [None]:
y_test

In [None]:
#since the data is already converted to a float type, normalizing the value
X_train = X_train/255
X_test = X_test/255

In [None]:
X_train

In [None]:
Input_shape = X_train.shape[1:]

In [None]:
Input_shape

### Model Training

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras import optimizers
from keras.optimizers import Adam
from keras.callbacks import TensorBoard

In [None]:
cnn_model = Sequential()
#convolution layer - I
cnn_model.add(Conv2D(filters = 32, kernel_size = (3, 3), activation = 'relu', input_shape = Input_shape))
cnn_model.add(Conv2D(filters = 32, kernel_size = (3, 3), activation = 'relu'))
cnn_model.add(MaxPooling2D(2, 2))
cnn_model.add(Dropout(0.3))
#convolution layer - II
cnn_model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
cnn_model.add(Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
cnn_model.add(MaxPooling2D(2, 2))
cnn_model.add(Dropout(0.2))
#flattening
cnn_model.add(Flatten())
#input layer
cnn_model.add(Dense(units = 100, activation ='relu'))
#hidden layer
cnn_model.add(Dense(units = 200, activation ='relu'))
cnn_model.add(Dense(units = 400, activation ='relu'))
cnn_model.add(Dense(units = 600, activation ='relu'))
#output layer
cnn_model.add(Dense(units = 10, activation = 'softmax'))

In [None]:
#compiling the model 
cnn_model.compile(loss = 'categorical_crossentropy', optimizer = keras.optimizers.RMSprop(learning_rate = 0.001), metrics = ['accuracy'])

In [None]:
history = cnn_model.fit(X_train, y_train, batch_size = 32, epochs = 10, shuffle = True)

### Model Evaluation

In [None]:
evaluation = cnn_model.evaluate(X_test, y_test)
print('Test Accuracy: {}'.format(evaluation[1]))

In [None]:
predict_x=cnn_model.predict(X_test) 
classes_x=np.argmax(predict_x,axis=1)
classes_x

In [None]:
y_test

In [None]:
y_test = y_test.argmax(1)

In [None]:
y_test

In [None]:
#visualizing the predictions of the model vs the real labels 
L_mat = 7
W_mat = 7
fig, axes = plt.subplots(L_mat, W_mat, figsize = (12, 12))
axes = axes.ravel()

for i in np.arange(0, L_mat * W_mat):
    axes[i].imshow(X_test[i])
    axes[i].set_title('Prediction = {} \n True = {}'.format(classes_x[i], y_test[i]))
    axes[i].axis('off')
    
plt.subplots_adjust(hspace = 0.6) 

In [None]:
#confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns

cm = confusion_matrix(y_test, classes_x)
cm

In [None]:
plt.figure(figsize = (8,8))
sns.heatmap(cm, annot = True)

### Saving the model

In [None]:
#saving the model locally
#the current code generated an accuracy of 72.85 upon the last training
import os
directory  = os.path.join(os.getcwd(), 'saved_models')

if not os.path.isdir(directory):
    os.makedirs(directory)
model_path = os.path.join(directory, 'keras_cifar10_cnn_trained_72:85.keras')
cnn_model.save(model_path)

## Model Improvment : Image Augmentation

In [None]:
import tensorflow as tf
import numpy as np
import keras
from keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [None]:
X_train.shape

In [None]:
datagen = ImageDataGenerator(rotation_range = 90)
datagen.fit(X_train)

In [None]:
#visualizing the different agumented images
def plot_augmented_images(datagen, x_train):
    for X_batch, y_batch in datagen.flow(X_train, y_train, batch_size=9):
        # Create a grid of 3x3 images
        for i in range(0, 9):
            plt.subplot(330 + 1 + i)
            # Clip the pixel values to the valid range 0-255
            img = np.clip(X_batch[i], 0, 255).astype('uint8')
            plt.imshow(img)
        plt.show()
        break
plot_augmented_images(datagen, X_train)

### Model Traning using Augumented dataset

In [None]:
X_train, X_test = X_train / 255.0, X_test / 255.0

In [None]:
num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range = 90,
    width_shift_range = 0.1,
    horizontal_flip = True,
    vertical_flip = True
)

In [None]:
datagen.fit(X_train)

In [None]:
history = cnn_model.fit(datagen.flow(X_train, y_train, batch_size = 32), epochs = 10, validation_data=(X_test, y_test))

### Evaluating this new Model with Augumented Images 

In [None]:
loss, accuracy = cnn_model.evaluate(X_test, y_test)

print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')