In [79]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import optuna

In [2]:
plt.style.use("ggplot")

In [5]:
train = pd.read_csv("data/raw/train.csv")
test = pd.read_csv("data/raw/test.csv")

In [51]:
X_train = train.drop(columns="label").values
y_train = train["label"].values

In [52]:
X_test = test.values

In [53]:
# Normalize the data 
X_train = X_train / 255
X_test = X_test / 255

In [82]:
# Reshape to image format
X_train = X_train.reshape(-1, 1, 28, 28)
X_test = X_test.reshape(-1, 1, 28, 28)

In [83]:
# Convert X to tensor
X_train = torch.Tensor(X_train)
X_test = torch.Tensor(X_test)

In [84]:
# One hot encoding
y_train_oh = torch.eye(10)[y_train]

In [89]:
class CNN(nn.Module):
    def __init__(self, 
                 num_filters1=8, 
                 num_filters2=16, 
                 num_hidden_units=256):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(1, num_filters1, kernel_size=5, padding=2)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.25)

        self.conv2 = nn.Conv2d(num_filters1, num_filters2, 
                               kernel_size=3,padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(0.25)

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(num_filters2 * 7 * 7, num_hidden_units)
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(num_hidden_units, 10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.dropout1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.dropout2(x)
        
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.dropout3(x)
        x = self.fc2(x)
        x = self.softmax(x)
        
        return x

In [94]:
num_samples = X_train.shape[0]

In [95]:
num_filters1 = 8
num_filters2 = 16
num_hidden_units=256
learning_rate = 0.001
num_epochs = 10
batch_size = 250

model = CNN(num_filters1=num_filters1, num_filters2=num_filters2, 
            num_hidden_units=num_hidden_units)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for start_index in range(0, num_samples, batch_size):
        end_index = start_index + batch_size
        if end_index > num_samples: continue
        batch = X_train[start_index:end_index]

        # Forward pass
        y_train_oh_ = model(X_train)
        loss = criterion(y_train_oh_, y_train_oh)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

KeyboardInterrupt: 

In [None]:
y_train_oh_ = model(X_train)
y_train_ = torch.argmax(y_train_oh_, dim=1).numpy()

accuracy = accuracy_score(y_train, y_train_)

print(f"Training accuracy: {accuracy}")