# Convolutional Neural Networks (CNN)

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
# filter warnings
warnings.filterwarnings('ignore')
import os
print(os.listdir("./data"))

## Loading the Data Set
* In this part we load and visualize the data.

In [None]:
# read train 
train = pd.read_csv("./data/train.csv")
print(train.shape)
train.head()

In [None]:
# read test 
test= pd.read_csv("./data/test.csv")
print(test.shape)
test.head()

In [None]:
# put labels into y_train variable
Y_train = train["label"]
# Drop 'label' column
X_train = train.drop(labels = ["label"],axis = 1) 

In [None]:
# visualize number of digits classes
plt.figure(figsize=(15,7))
g = sns.countplot(Y_train, palette="icefire")
plt.title("Number of digit classes")
Y_train.value_counts()

In [None]:
# plot some samples
img = X_train.iloc[0].to_numpy()
img = img.reshape((28,28))
plt.imshow(img,cmap='gray')
plt.title(train.iloc[0,0])
plt.axis("off")
plt.show()

In [None]:
# plot some samples
img = X_train.iloc[3].to_numpy()
img = img.reshape((28,28))
plt.imshow(img,cmap='gray')
plt.title(train.iloc[3,0])
plt.axis("off")
plt.show()

In [None]:
# Normalize the data
X_train = X_train / 255.0
test = test / 255.0
print("x_train shape: ",X_train.shape)
print("test shape: ",test.shape)

In [None]:
# Reshape
X_train = X_train.reshape(-1,28,28,1)
test = test.reshape(-1,28,28,1)
print("x_train shape: ",X_train.shape)
print("test shape: ",test.shape)

In [None]:
# Label Encoding 
from keras.utils import to_categorical 
Y_train = to_categorical(Y_train, num_classes = 10)

<a id="3"></a>
## Train Test Split
* We split the data into train and test sets.
* test size is 10%.
* train size is 90%.

In [None]:
# Split the train and the validation set for the fitting
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=2)
print("x_train shape",X_train.shape)
print("x_test shape",X_val.shape)
print("y_train shape",Y_train.shape)
print("y_test shape",Y_val.shape)

In [None]:
# Some examples
plt.imshow(X_train[2][:,:,0],cmap='gray')
plt.show()

<a id="4"></a>
## Convolutional Neural Network 
* CNN is used for image classification, object detection 
* <a href="https://ibb.co/kV1j9p"><img src="https://preview.ibb.co/nRkBpp/gec2.jpg" alt="gec2" border="0"></a>

<a id="5"></a>
### What is Convolution Operation?
* We have some image and feature detector(3*3)
* Feature detector does not need to be 3 by 3 matrix. It can be 5 by 5 or 7 by 7.
* Feature detector = kernel = filter
* Feauture detector detects features like edges or convex shapes. Example, if out input is dog, feature detector can detect features like ear or tail of the dog.
* feature map = conv(input image, feature detector). Element wise multiplication of matrices.
* feature map = convolved feature
* Stride = navigating in input image.
* We reduce the size of image. This is important bc code runs faster. However, we lost information. 
* We create multiple feature maps bc we use multiple feature detectors(filters).
* Lets look at gimp. Edge detect: [0,10,0],[10,-4,10],[0,10,0]
* <a href="https://imgbb.com/"><img src="https://image.ibb.co/m4FQC9/gec.jpg" alt="gec" border="0"></a>
* After having convolution layer we use ReLU to break up linearity. Increase nonlinearity. Because images are non linear.
* <a href="https://ibb.co/mVZih9"><img src="https://preview.ibb.co/gbcQvU/RELU.jpg" alt="RELU" border="0"></a>

<a id="7"></a>
### Max Pooling
* It makes down-sampling or sub-sampling (Reduces the number of parameters)
* It makes the detection of features invariant to scale or orientation changes.
* It reduce the amount of parameters and computation in the network, and hence to also control overfitting. 
* <a href="https://ibb.co/ckTjN9"><img src="https://preview.ibb.co/gsNYFU/maxpool.jpg" alt="maxpool" border="0"></a>

<a id="8"></a>
### Flattening
* <a href="https://imgbb.com/"><img src="https://image.ibb.co/c7eVvU/flattenigng.jpg" alt="flattenigng" border="0"></a>

<a id="9"></a>
### Full Connection
* Neurons in a fully connected layer have connections to all activations in the previous layer
* Artificial Neural Network
* <a href="https://ibb.co/hsS14p"><img src="https://preview.ibb.co/evzsAU/fullyc.jpg" alt="fullyc" border="0"></a>

<a id="10"></a>
## Implementing with Keras

In [None]:
# Importing necessary libraries
from sklearn.metrics import confusion_matrix      # For creating a confusion matrix
import itertools                                  # For various utility functions

from keras.models import Sequential               # Importing the Sequential model from Keras
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D  # Importing different types of layers from Keras
from keras.optimizers import Adam                 # Importing Adam optimizer from Keras

model = Sequential()
# Creates a sequential model object, which allows building a model layer-by-layer

model.add(Conv2D(filters = 8, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
# Adds a 2D convolutional layer to the model with 8 filters, a kernel size of (5,5), same padding,
# ReLU activation function, and an input shape of (28,28,1)
# Convolutional layers are commonly used in image processing tasks for feature extraction

model.add(MaxPool2D(pool_size=(2,2)))
# Adds a max pooling layer to the model with a pool size of (2,2)
# Max pooling is used to downsample the output of the previous convolutional layer

model.add(Flatten())
# Flattens the multi-dimensional output from the previous layer into a single vector
# This is required to connect the flat vector to the fully connected layers

model.add(Dense(256, activation = "relu"))
# Adds a dense layer to the model with 256 units and ReLU activation function
# Fully connected layers are used to process the flattened feature vector and capture higher-level patterns

model.add(Dropout(0.5))
# Applies dropout regularization to the previous layer with a rate of 0.5
# Dropout helps prevent overfitting by randomly setting a fraction of input units to 0 during training

model.add(Dense(10, activation = "softmax"))
# Adds a dense layer to the model with 10 units and softmax activation function
# The dense layer is typically used as the output layer in multi-class classification problems
# Softmax activation function produces probabilities for each class

In [None]:
# Compile the model
model.compile(optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999) , loss = "categorical_crossentropy", metrics=["accuracy"])

In [None]:
epochs = 10  # for better result increase the epochs
batch_size = 250

<a id="16"></a>
### Fit the model

In [None]:
# Fit the model
history = model.fit(X_train,Y_train, batch_size=batch_size,
                              epochs = epochs, validation_data = (X_val,Y_val))

<a id="17"></a>
### Evaluate the model
* Test Loss visualization
* Confusion matrix


In [None]:
# Plot the loss and accuracy curves for training and validation 
plt.plot(history.history['val_loss'], color='b', label="validation loss")
plt.title("Test Loss")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
# confusion matrix
import seaborn as sns
# Predict the values from the validation dataset
Y_pred = model.predict(X_val)
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
# Convert validation observations to one hot vectors
Y_true = np.argmax(Y_val,axis = 1) 
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
# plot the confusion matrix
f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="Greens",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()