### Import Packages
- Our test code will rely only on below packages.

In [1]:
import os
import sys
import pickle
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision import transforms

from PIL import Image

from model import MyModel
from utils import score, load_checkpoint, reset, count_parameters

### Define data path
- Your code should work well with `val.npz` file.
- We have our own `test.npz` file for scoring, and it will not be shared to students.

In [2]:
data_fp = 'val.npz'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 64

### Define Test Dataset class

In [3]:
class TestDataset(Dataset):
    def __init__(self, npz_fp, transform=None):
        with np.load(npz_fp, allow_pickle=True) as data:
            self.data = data["data"]
            self.labels = data["labels"]
        self.transform = transform

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, idx: int):
        img_data = self.data[idx].astype("uint8").reshape((28, 28))
        img_label = int(self.labels[idx])

        img_data = Image.fromarray(img_data)

        if self.transform:
            img_data = self.transform(img_data)

        return img_data, img_label

### Define default transform for test
- We will use `test_transforms` for testing your code.

In [4]:
test_transforms = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.286], std=[0.353]),
    ]
)

test_dataset = TestDataset(data_fp, transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8)

### Define test function

In [5]:
def test(model, sample):
    model.eval()

    with torch.no_grad():
        input, label = sample[0].to(device), sample[1].to(device)
        pred = model(input)
        num_correct = torch.sum(torch.argmax(pred, dim=-1) == label)

    return num_correct.item()

### Do the testing and calculate the score

In [6]:
reset(0)

model = load_checkpoint('./check/checkpoint.pth', device)

num_params = count_parameters(model)
if num_params > 1000000:
    raise ValueError("Cannot have more than 1 million parameters!")

avg_te_correct = 0
for sample in test_loader:
    te_correct = test(model, sample)
    avg_te_correct += te_correct / len(test_dataset)

print('Your accuracy: {:.02f}%'.format(avg_te_correct*100))
print('Your score: {:.02f} out of 100'.format(score(avg_te_correct*100)))

Your accuracy: 84.60%
Your score: 79.90 out of 100
