# ML Mini project

#### Importing Libraries and the Dataset

In [41]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [42]:
df = pd.read_csv("sign_mnist_train.csv")

In [43]:
df

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,3,107,118,127,134,139,143,146,150,153,...,207,207,207,207,206,206,206,204,203,202
1,6,155,157,156,156,156,157,156,158,158,...,69,149,128,87,94,163,175,103,135,149
2,2,187,188,188,187,187,186,187,188,187,...,202,201,200,199,198,199,198,195,194,195
3,2,211,211,212,212,211,210,211,210,210,...,235,234,233,231,230,226,225,222,229,163
4,13,164,167,170,172,176,179,180,184,185,...,92,105,105,108,133,163,157,163,164,179
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27450,13,189,189,190,190,192,193,193,193,193,...,132,165,99,77,52,200,234,200,222,225
27451,23,151,154,157,158,160,161,163,164,166,...,198,198,198,198,198,196,195,195,195,194
27452,18,174,174,174,174,174,175,175,174,173,...,121,196,209,208,206,204,203,202,200,200
27453,17,177,181,184,185,187,189,190,191,191,...,119,56,27,58,102,79,47,64,87,93


#### Creating a DataLoader

In [44]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [45]:
X_train = torch.tensor(df.iloc[:,1:].to_numpy(),dtype=torch.float32).unsqueeze(-1).permute(0,2,1)
y_train = torch.tensor(df.iloc[:,0].to_numpy())
X_train.shape,y_train

(torch.Size([27455, 1, 784]), tensor([ 3,  6,  2,  ..., 18, 17, 23]))

In [46]:
train_data = MyDataset(X_train,y_train)
test_data = MyDataset(X_train,y_train)

In [47]:
from torch.utils.data import DataLoader

In [48]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [49]:
device

'cpu'

In [50]:
train_dataloader = DataLoader(train_data,batch_size=8,shuffle=True,pin_memory=True)
test_dataloader = DataLoader(test_data,batch_size=8,shuffle=True,pin_memory=True)

In [51]:
next(iter(train_dataloader))

[tensor([[[166., 166., 166.,  ..., 152., 159., 159.]],
 
         [[186., 188., 191.,  ..., 125., 108.,  79.]],
 
         [[120., 121., 121.,  ..., 139., 137., 137.]],
 
         ...,
 
         [[197., 197., 197.,  ...,  30.,  26.,  26.]],
 
         [[133., 134., 137.,  ..., 173., 173., 172.]],
 
         [[142., 155., 165.,  ..., 132., 135., 136.]]]),
 tensor([14, 17, 11, 11, 18,  6,  6, 11])]

#### The CNN Model

In [52]:
class CNN(nn.Module):
    def __init__(self,input_shape,hidden_units,output_shape):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv1d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2,
                         stride=2)
        )
        self.block_2 = nn.Sequential(
            nn.Conv1d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.Conv1d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2,stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*7*7,
                      out_features=output_shape),
            nn.Dropout(0.2)
        )

    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_2(x)
        x = self.block_2(x)
        x = self.classifier(x)
        return x



####Train Function

In [53]:
def train_model(model_0,train_dataloader: DataLoader, test_dataloader: DataLoader, optimizer: torch.optim.Optimizer, loss_fn: torch.nn.Module, accuracy_fn,
          epochs: int,verbose: bool = True):
        results = []
        for epoch in range(epochs):
            print(f"Epoch: {epoch}\n-------")
            metrics = {
                 "train_loss": [],
                 "test_loss": [],
                 "train_acc": [],
                 "test_acc": []
            }
            train_loss,train_acc = 0,0
            for batch, (X, y) in enumerate(train_dataloader):
                X,y = X.to(device),y.to(device)
                model_0.train()
                # 1. Forward pass
                y_pred = model_0(X)

                # 2. Calculate loss (per batch)
                loss = loss_fn(y_pred, y)
                train_loss += loss # accumulatively add up the loss per epoch
                train_acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
                # 3. Optimizer zero grad
                optimizer.zero_grad()

                # 4. Loss backward
                loss.backward()

                # 5. Optimizer step
                optimizer.step()

                # Print out how many samples have been seen
                if batch % 400 == 0:
                    print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")

            # Divide total train loss by length of train dataloader (average loss per batch per epoch)
            train_loss /= len(train_dataloader)
            train_acc /= len(train_dataloader)
            metrics["train_loss"].append(train_loss.detach().to("cpu").numpy())
            metrics["train_acc"].append(train_acc)
            ### Testing
            # Setup variables for accumulatively adding up loss and accuracy
            test_loss, test_acc = 0, 0
            model_0.eval()
            with torch.inference_mode():
                for X, y in test_dataloader:
                    X,y = X.to(device),y.to(device)
                    # 1. Forward pass
                    test_pred = model_0(X)

                    # 2. Calculate loss (accumatively)
                    test_loss += loss_fn(test_pred, y) # accumulatively add up the loss per epoch

                    # 3. Calculate accuracy (preds need to be same as y_true)
                    test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))

                # Calculations on test metrics need to happen inside torch.inference_mode()
                # Divide total test loss by length of test dataloader (per batch)
                test_loss /= len(test_dataloader)

                # Divide total accuracy by length of test dataloader (per batch)
                test_acc /= len(test_dataloader)

                ## Print out what's happening



                metrics["test_acc"].append(test_acc)
                metrics["test_loss"].append(test_loss.detach().to("cpu").numpy())


            print(f"\nTrain loss: {train_loss:.5f}, Train Accuracy: {train_acc: .4f} | Test loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%\n")
            results.append(metrics)


        return results

In [54]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

#### Training the Model for 10 epochs

In [55]:
model = CNN(1,12,26).to(device)

In [56]:
model

CNN(
  (block_1): Sequential(
    (0): Conv1d(1, 12, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv1d(12, 12, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): ReLU()
    (2): Conv1d(12, 12, kernel_size=(3,), stride=(1,), padding=(1,))
    (3): ReLU()
    (4): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=588, out_features=26, bias=True)
    (2): Dropout(p=0.2, inplace=False)
  )
)

In [57]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [58]:
results = train_model(model,train_dataloader,test_dataloader,optimizer,loss_fn,accuracy_fn,10,True)

Epoch: 0
-------
Looked at 0/27455 samples
Looked at 3200/27455 samples
Looked at 6400/27455 samples
Looked at 9600/27455 samples
Looked at 12800/27455 samples
Looked at 16000/27455 samples
Looked at 19200/27455 samples
Looked at 22400/27455 samples
Looked at 25600/27455 samples

Train loss: 0.93538, Train Accuracy:  70.4618 | Test loss: 0.07374, Test Accuracy: 97.84%

Epoch: 1
-------
Looked at 0/27455 samples
Looked at 3200/27455 samples
Looked at 6400/27455 samples
Looked at 9600/27455 samples
Looked at 12800/27455 samples
Looked at 16000/27455 samples
Looked at 19200/27455 samples
Looked at 22400/27455 samples
Looked at 25600/27455 samples

Train loss: 0.46174, Train Accuracy:  82.3166 | Test loss: 0.03131, Test Accuracy: 99.20%

Epoch: 2
-------
Looked at 0/27455 samples
Looked at 3200/27455 samples
Looked at 6400/27455 samples
Looked at 9600/27455 samples
Looked at 12800/27455 samples
Looked at 16000/27455 samples
Looked at 19200/27455 samples
Looked at 22400/27455 samples
Looked

In [59]:
epochs = np.arange(1,11,1)