In [1]:
import torch
import torch.optim as optim
import torch.utils.data
import torch.backends.cudnn as cudnn
import torchvision
from torchvision import transforms, datasets, models
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

In [2]:
#--- hyperparameters ---
N_EPOCHS = 1
BATCH_SIZE_TRAIN = 100
BATCH_SIZE_TEST = 100
LR = 0.005

In [3]:
#--- fixed constants ---
NUM_CLASSES = 24
DATA_DIR = '../data/sign_mnist_%s'

In [4]:
# --- Dataset initialization ---

# We transform image files' contents to tensors
# Plus, we can add random transformations to the training data if we like
# Think on what kind of transformations may be meaningful for this data.
# Eg., horizontal-flip is definitely a bad idea for sign language data.
# You can use another transformation here if you find a better one.

# Grayscale + toTensor + Normalize
train_transform = transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=0.5, std=0.5, inplace=True)])

test_transform = transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=0.5, std=0.5, inplace=True)])

train_set = datasets.ImageFolder(DATA_DIR % 'train', transform=train_transform)
dev_set   = datasets.ImageFolder(DATA_DIR % 'dev',   transform=test_transform)
test_set  = datasets.ImageFolder(DATA_DIR % 'test',  transform=test_transform)


# Create Pytorch data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=BATCH_SIZE_TRAIN, shuffle=True)
dev_loader = torch.utils.data.DataLoader(dataset=dev_set, batch_size=BATCH_SIZE_TRAIN, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=BATCH_SIZE_TEST, shuffle=False)

In [5]:
#--- model ---
class CNN(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super(CNN, self).__init__()
        # WRITE CODE HERE
        
        # Sequential 1: Convolution + batch normalization + ReLU + maxpooling
        self.seq1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=100, kernel_size=3, stride=1),
            nn.BatchNorm2d(num_features=100),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0))
        
        # Sequential 2: Convolution + batch normalization + ReLU + maxpooling
        self.seq2 = nn.Sequential(
            nn.Conv2d(in_channels=100, out_channels=80, kernel_size=5, stride=1),
            nn.BatchNorm2d(num_features=80),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=4, padding=0))
        
        # Sequential 3: Linear + ReLU + Linear
        self.seq3 = nn.Sequential(
            nn.Linear(in_features=4*80, out_features=250),
            nn.ReLU(),
            nn.Linear(in_features=250, out_features=NUM_CLASSES))
            
    def forward(self, x):
        # WRITE CODE HERE
  
        # Sequential 1
        x = self.seq1(x)
        
        # Sequential 2
        x = self.seq2(x)
        
        # Reshape
        x = x.view(x.size(0), -1)
        
        # Sequential 3
        x = self.seq3(x)
        
        # log_softmax
        x = F.log_softmax(x, dim=1)
        
        # Return x
        return x

# Print model summary
#print(CNN())

In [6]:
#--- set up ---

# Print Cuda info
print("Cuda is available: {} \n".format(torch.cuda.is_available()))


if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

model = CNN().to(device)

# WRITE CODE HERE

# Oprimizers
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# Loss functions
loss_function = nn.CrossEntropyLoss()


#--- training ---
for epoch in range(N_EPOCHS):
    train_loss = 0
    train_correct = 0
    total = 0
    for batch_num, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Sum of sizes
        total += target.size(0)
        
        # Get prediction values
        outputs = model(data)
        
        # Get prediction labels
        _, predicted = torch.max(outputs, 1)
        
        # Sum of number of correct predictions
        train_correct += (predicted == target).sum().item()
        
        # Calculate loss
        loss=loss_function(outputs, target)
        
        # Backward + optimizer step
        loss.backward()
        optimizer.step()

        # Print statistics
        train_loss+=loss
        print('Training: Epoch %d - Batch %d/%d: Loss: %.4f | Train Acc: %.3f%% (%d/%d)' % 
              (epoch, batch_num, len(train_loader), train_loss / (batch_num + 1), 
               100. * train_correct / total, train_correct, total))
    
    # WRITE CODE HERE
    # Please implement early stopping here.
    # You can try different versions, simplest way is to calculate the dev error and
    # compare this with the previous dev error, stopping if the error has grown.


Cuda is available: False 

Training: Epoch 0 - Batch 0/275: Loss: 3.2102 | Train Acc: 6.000% (6/100)
Training: Epoch 0 - Batch 1/275: Loss: 3.4393 | Train Acc: 6.000% (12/200)
Training: Epoch 0 - Batch 2/275: Loss: 3.3835 | Train Acc: 6.000% (18/300)
Training: Epoch 0 - Batch 3/275: Loss: 3.3244 | Train Acc: 6.750% (27/400)
Training: Epoch 0 - Batch 4/275: Loss: 3.2747 | Train Acc: 7.400% (37/500)
Training: Epoch 0 - Batch 5/275: Loss: 3.2451 | Train Acc: 7.667% (46/600)
Training: Epoch 0 - Batch 6/275: Loss: 3.1987 | Train Acc: 8.571% (60/700)
Training: Epoch 0 - Batch 7/275: Loss: 3.1650 | Train Acc: 10.000% (80/800)
Training: Epoch 0 - Batch 8/275: Loss: 3.1282 | Train Acc: 11.111% (100/900)
Training: Epoch 0 - Batch 9/275: Loss: 3.0773 | Train Acc: 12.900% (129/1000)
Training: Epoch 0 - Batch 10/275: Loss: 3.0397 | Train Acc: 13.909% (153/1100)
Training: Epoch 0 - Batch 11/275: Loss: 2.9978 | Train Acc: 15.083% (181/1200)
Training: Epoch 0 - Batch 12/275: Loss: 2.9588 | Train Acc: 

Training: Epoch 0 - Batch 103/275: Loss: 0.9230 | Train Acc: 72.606% (7551/10400)
Training: Epoch 0 - Batch 104/275: Loss: 0.9156 | Train Acc: 72.819% (7646/10500)
Training: Epoch 0 - Batch 105/275: Loss: 0.9076 | Train Acc: 73.066% (7745/10600)
Training: Epoch 0 - Batch 106/275: Loss: 0.9002 | Train Acc: 73.271% (7840/10700)
Training: Epoch 0 - Batch 107/275: Loss: 0.8928 | Train Acc: 73.500% (7938/10800)
Training: Epoch 0 - Batch 108/275: Loss: 0.8851 | Train Acc: 73.743% (8038/10900)
Training: Epoch 0 - Batch 109/275: Loss: 0.8778 | Train Acc: 73.964% (8136/11000)
Training: Epoch 0 - Batch 110/275: Loss: 0.8706 | Train Acc: 74.171% (8233/11100)
Training: Epoch 0 - Batch 111/275: Loss: 0.8635 | Train Acc: 74.384% (8331/11200)
Training: Epoch 0 - Batch 112/275: Loss: 0.8563 | Train Acc: 74.611% (8431/11300)
Training: Epoch 0 - Batch 113/275: Loss: 0.8493 | Train Acc: 74.825% (8530/11400)
Training: Epoch 0 - Batch 114/275: Loss: 0.8426 | Train Acc: 75.026% (8628/11500)
Training: Epoch 

Training: Epoch 0 - Batch 203/275: Loss: 0.4834 | Train Acc: 85.828% (17509/20400)
Training: Epoch 0 - Batch 204/275: Loss: 0.4811 | Train Acc: 85.898% (17609/20500)
Training: Epoch 0 - Batch 205/275: Loss: 0.4787 | Train Acc: 85.966% (17709/20600)
Training: Epoch 0 - Batch 206/275: Loss: 0.4765 | Train Acc: 86.034% (17809/20700)
Training: Epoch 0 - Batch 207/275: Loss: 0.4742 | Train Acc: 86.101% (17909/20800)
Training: Epoch 0 - Batch 208/275: Loss: 0.4719 | Train Acc: 86.167% (18009/20900)
Training: Epoch 0 - Batch 209/275: Loss: 0.4697 | Train Acc: 86.233% (18109/21000)
Training: Epoch 0 - Batch 210/275: Loss: 0.4675 | Train Acc: 86.299% (18209/21100)
Training: Epoch 0 - Batch 211/275: Loss: 0.4653 | Train Acc: 86.363% (18309/21200)
Training: Epoch 0 - Batch 212/275: Loss: 0.4631 | Train Acc: 86.427% (18409/21300)
Training: Epoch 0 - Batch 213/275: Loss: 0.4610 | Train Acc: 86.491% (18509/21400)
Training: Epoch 0 - Batch 214/275: Loss: 0.4588 | Train Acc: 86.553% (18609/21500)
Trai

In [7]:
#--- test ---
test_loss = 0
test_correct = 0
total = 0

with torch.no_grad():
    for batch_num, (data, target) in enumerate(test_loader):
        data, target = data.to(device), target.to(device)
        # WRITE CODE HERE
        outputs=model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        test_correct += (predicted == target).sum()
        loss=loss_function(outputs, target)
        
        test_loss+=loss
        print('Evaluating: Batch %d/%d: Loss: %.4f | Test Acc: %.3f%% (%d/%d)' % 
              (batch_num, len(test_loader), test_loss / (batch_num + 1), 
               100. * test_correct / total, test_correct, total))


Evaluating: Batch 0/37: Loss: 1.0364 | Test Acc: 66.000% (66/100)
Evaluating: Batch 1/37: Loss: 0.5997 | Test Acc: 80.000% (160/200)
Evaluating: Batch 2/37: Loss: 1.0186 | Test Acc: 72.667% (218/300)
Evaluating: Batch 3/37: Loss: 1.2728 | Test Acc: 70.750% (283/400)
Evaluating: Batch 4/37: Loss: 1.1441 | Test Acc: 73.200% (366/500)
Evaluating: Batch 5/37: Loss: 0.9666 | Test Acc: 77.500% (465/600)
Evaluating: Batch 6/37: Loss: 0.8684 | Test Acc: 79.143% (554/700)
Evaluating: Batch 7/37: Loss: 0.7684 | Test Acc: 81.625% (653/800)
Evaluating: Batch 8/37: Loss: 0.6935 | Test Acc: 83.333% (750/900)
Evaluating: Batch 9/37: Loss: 0.6251 | Test Acc: 85.000% (850/1000)
Evaluating: Batch 10/37: Loss: 0.5781 | Test Acc: 86.182% (948/1100)
Evaluating: Batch 11/37: Loss: 0.6959 | Test Acc: 84.083% (1009/1200)
Evaluating: Batch 12/37: Loss: 0.7952 | Test Acc: 81.846% (1064/1300)
Evaluating: Batch 13/37: Loss: 0.9387 | Test Acc: 77.714% (1088/1400)
Evaluating: Batch 14/37: Loss: 0.9080 | Test Acc: 7