In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score
from google.colab import drive

In [2]:
plt.style.use("ggplot")

In [3]:
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
train = pd.read_csv("/content/drive/MyDrive/mnist/train.csv")
test = pd.read_csv("/content/drive/MyDrive/mnist/test.csv")

In [6]:
X_train = train.drop(columns="label").values
y_train = train["label"].values

In [7]:
X_test = test.values

In [8]:
# Normalize the data
X_train = X_train / 255
X_test = X_test / 255

In [9]:
# Reshape to image format
X_train = X_train.reshape(-1, 1, 28, 28)
X_test = X_test.reshape(-1, 1, 28, 28)

In [10]:
# One hot encoding
y_train_oh = torch.eye(10)[y_train]

In [11]:
# Convert X, y to tensor
X_train = torch.Tensor(X_train)
y_train_oh = torch.Tensor(y_train_oh)

X_test = torch.Tensor(X_test)

In [12]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.3)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.relu3 = nn.ReLU()

        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.relu4 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(0.3)

        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.relu5 = nn.ReLU()

        self.conv6 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.relu6 = nn.ReLU()
        self.dropout3 = nn.Dropout(0.35)

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(256 * 7 * 7, 512)
        self.relu7 = nn.ReLU()
        self.dropout4 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(512, 10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool1(x)
        x = self.dropout1(x)

        x = self.conv3(x)
        x = self.relu3(x)

        x = self.conv4(x)
        x = self.relu4(x)
        x = self.pool2(x)
        x = self.dropout2(x)

        x = self.conv5(x)
        x = self.relu5(x)

        x = self.conv6(x)
        x = self.relu6(x)
        x = self.dropout3(x)

        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu7(x)
        x = self.dropout4(x)

        x = self.fc2(x)
        x = self.softmax(x)

        return x

In [13]:
num_samples = X_train.shape[0]

In [14]:
num_filters1 = 8
num_filters2 = 16
num_hidden_units=256
learning_rate = 0.001
num_epochs = 30
batch_size = 110

model = CNN()

# Move the model, input data, and labels to the GPU
model = model.to(device)
X_train = X_train.to(device)
y_train_oh = y_train_oh.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for start_index in range(0, num_samples, batch_size):
        end_index = start_index + batch_size
        if end_index > num_samples: continue
        X_batch = X_train[start_index: end_index]
        y_batch = y_train_oh[start_index: end_index]

        # Forward pass
        y_batch_ = model(X_batch)
        loss = criterion(y_batch_, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [15]:
model = model.to(device)
X_test = X_test.to(device)

In [16]:
# Make predictions from X_test in batches because
# memory limitations
batch_size = 1000
num_samples = X_test.shape[0]
num_batches = (num_samples + batch_size - 1) // batch_size

model.eval()

y_test_oh_ = []

with torch.no_grad():
    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = min((batch_idx + 1) * batch_size, num_samples)
        inputs = X_test[start_idx: end_idx]
        outputs = model(inputs)
        y_test_oh_.append(outputs)

y_test_oh_ = torch.cat(y_test_oh_, dim=0)

In [17]:
y_test_oh_ = y_test_oh_.cpu().numpy()
y_test_ = np.argmax(y_test_oh_, axis=1)

In [18]:
test_predictions = pd.Series(y_test_, name="Label")
test_predictions.index.name = "ImageId"
test_predictions.index += 1 # kaggle expects one indexed predictions

In [19]:
test_predictions.to_csv("/content/drive/MyDrive/mnist/cnn_baseline2.csv")