<a href="https://colab.research.google.com/github/tszalama/ml_dice/blob/main/Pytorch_dice_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Flags and parameters

In [None]:
LOAD_MODEL = True #if set to True, a pre-trained model will be downloaded and loaded
TEST_FILE = 'dice_09.dat'

Download dataset

In [None]:
!wget 'https://www.dropbox.com/s/9ufpyaghb7fyfq9/dice.zip'  #Dataset
!unzip dice.zip
!rm -rf __MACOSX

Import all necessary libraries

In [None]:
import io
import os
import torch
import numpy as np
from torch.utils.data import DataLoader, Dataset
import torch
from torch import nn, optim
from torchvision import datasets, transforms

Dataset definition

In [None]:
class DiceDataset(Dataset):
    def __init__(self, train=True, root_path='dice/data/', test_file=TEST_FILE):
      contents = []
      for f in os.scandir(root_path):
        if f.is_file() and f.name.endswith('.dat'):
          if train ^ (f.name == test_file):
            file_contents = np.fromfile(root_path+f.name, dtype=np.uint8).reshape(-1,401)
            contents.append(file_contents)
      contents = np.vstack(contents)
      self.pictures = contents[:,1:].reshape(-1,1,20,20)
      self.pictures = torch.tensor(self.pictures, dtype=torch.float32) / 255
      self.labels = contents[:,0]

    def __getitem__(self, key):
        return self.pictures[key], self.labels[key]

    def __len__(self):
        return len(self.pictures)

Data loader / data set initialization

In [None]:
train_data = DiceDataset(train=True)
test_data = DiceDataset(train=False)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False)

Model definition

In [None]:
#Model that uses CNN for recognizing dice (based on MNIST CNN example shown in lectures)
model_dice = nn.Sequential(
  nn.Conv2d(1, 32, 3),  #Apply convolution with 1 in channel, 32 out channels and kernel size 3
  nn.ReLU(),            #Activation function
  nn.Conv2d(32, 32, 3),
  nn.ReLU(),            
  nn.MaxPool2d((2,2)),  #Apply a max pooling that reduces the amount of parameters by combining the strongest features
  nn.Conv2d(32, 32, 3),
  nn.ReLU(),
  nn.Flatten(),         #Reshape into a one-dimensional tensor
  nn.Linear(in_features=1152, out_features=132), #Apply linnear transformation
  nn.ReLU(),
  nn.Linear(in_features=132, out_features=13),
  nn.LogSoftmax(1) #activation function based on softmax to categorize result
)

Model initialization

In [None]:
#Load a pre-trained model from google drive if the LOAD_MODEL is set to true
if LOAD_MODEL:
  LOADFILE = 'dice_model.pt'
  !wget -O dice_model.pt 'https://drive.google.com/uc?export=download&id=14y_3sFo3x-szv8LF-aF15KWA99kcItnY'
  model = torch.load(LOADFILE)
  model.eval()
else:
  #If LOAD_MODEL flag is set to false, initialize a new model
  model = model_dice

Device configuration

In [None]:
# Check if GPU is available, else use CPU (slower training)
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print('GPU State:', device)
model.to(device)

GPU State: cuda:0


Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (3): ReLU()
  (4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (6): ReLU()
  (7): Flatten(start_dim=1, end_dim=-1)
  (8): Linear(in_features=1152, out_features=132, bias=True)
  (9): ReLU()
  (10): Linear(in_features=132, out_features=13, bias=True)
  (11): LogSoftmax(dim=1)
)

Specify training parameters, loss function and backpropagation function

In [None]:
#Only if model was not loaded
if LOAD_MODEL == False:
  epochs = 5
  lr = 0.005
  momentum = 0.9
  criterion = nn.NLLLoss() #define loss function
  optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) #define the optimizer

Training loop

In [None]:
if LOAD_MODEL == False:
  #Training loop (based on MNIST example shown in lectures)
  for epoch in range(epochs):
      running_loss = 0.0

      for times, data in enumerate(train_loader):
          inputs, labels = data[0].to(device), data[1].to(device)

          # Forward pass
          optimizer.zero_grad()
          outputs = model(inputs)

          #Determine loss and perform backpropagation
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          #Save loss in variable
          loss_result = loss.item()

          #Print loss and epoch once every 100 cycles
          if times % 100 == 0:
              print('[Epoch: %d] [Loss: %.3f]' % (epoch+1, loss_result))

Save model

In [None]:
#torch.save(model, 'dice_model.pt')

Test accuracy

In [None]:
correct = 0
total = 0

#Test the accuracy of the model (based on MNIST example shown in lectures)
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data #seperate data into inputs and labels
        inputs, labels = inputs.to(device), labels.to(device) #load to device

        outputs = model(inputs)  #get output for inputs
        _, predicted = torch.max(outputs.data, 1) #get the maximum value from a 1d vector
        total += labels.size(0) #count the total label amount
        correct += (predicted == labels).sum().item() #count the correct labels (where prediction == expected outcome)

print('Precision for %d test cases is %.2f %%' % (total, (100*correct / total))) #output total and precision

Precision for 100000 test cases is 99.97 %
