# Creating a Leaderboard Submission File

In this notebook, we will:
- define a DataLoader that reads from the `test_unlabeled` directory
- loads your model weights
- makes predictions
- saves to a csv file which you can upload to the Leaderboard submission

## <font color="red"> TODO</font>
In the following cell, fill in:
1. `MODEL_NAME` with your model filename, under the `models` folder (for example if you named your model `my_model.pth`, just write `MODEL_NAME = my_model.pth` **not** the full path such as ./models/my_model.pth)

2. `CSV_NAME` with the filename you would like to save your predictions to (must be a CSV file, for example `my_predictions.csv`)

In [None]:
MODEL_NAME = 'your_model_name.pth'

CSV_NAME = 'predictions.csv' # feel free to replace with desired CSV filename

In [None]:
%load_ext autoreload
%autoreload 1
%matplotlib inline
%aimport student

import os
from skimage import io
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets

classes = [
     'bighorn-sheep',
     'bison',
     'black-stork',
     'brown-bear',
     'bullfrog',
     'camel',
     'gazelle',
     'golden-retriever',
     'goldfish',
     'ladybug',
     'lion',
     'orangutan',
     'penguin',
     'persian-cat',
     'pig',
     'puma'
]

In [None]:
class AnimalDatasetTest(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Dataloader for test set, where there are no labels
        Args:
            root_dir (string):
                Directory with all the images.
                Of the form:
                    <XXXX>.JPEG
                    <XXXX>.JPEG
                        ...
                where <XXXX>.JPEG are the images. Must be .JPEG extension.
                
            transform (callable, optional): Optional transform to be applied
                on a sample.
                
        """
        self.root_dir = root_dir
        self.transform = transform

        self.raw_data = []
 
        for root, directory, files in os.walk(root_dir):
            for file in files:
                if '.JPEG' in file:
                    self.raw_data.append(os.path.join(root, file))

    def __len__(self):
        return len(self.raw_data)

    def __getitem__(self, idx):
        if type(idx) == torch.Tensor:
            idx = idx.item()
        
        image_path = self.raw_data[idx]
        image = io.imread(image_path)
        image = self.transform(image)
        ID = image_path.split(os.sep)[-1].split('.JPEG')[0]
        ID = int(ID)
        
        return image, ID #label

#### Loading the Test Data####
Before running the following cell, please check that you have the test data from gradescope and it is in the data folder. You can download the test data from the **Leaderboard** submission entry on gradescope.

In [None]:
dataset_means = [123./255., 116./255.,  97./255.]
dataset_stds  = [ 54./255.,  53./255.,  52./255.]


transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.Normalize(dataset_means, dataset_stds)
        ])

batch_size = 1

test_dataset    = AnimalDatasetTest(os.path.join('data', 'test_unlabeled'), transform=transform)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
print(len(test_dataset))

In [None]:
%aimport student

net = student.AnimalStudentNet()

weights_path = os.path.join('.', 'models', MODEL_NAME)
net.load_state_dict(torch.load(weights_path, map_location="cpu"))

In [None]:
# Sample submission file
with torch.no_grad():
    arr = np.zeros((len(test_dataset), 2))
    df_submission = pd.DataFrame(arr, columns=['Id', 'Category'])
    net.eval()

    for image, ID in test_dataloader:
        # ID is row in Leaderboard CSV submission file
        ID = ID.numpy()[0]
        output  = net(image)
        # Prediction is class with highest class score
        _, pred = torch.max(output, 1)
        pred = pred.numpy()[0]
        # Put prediction in corresponding row of Dataframe
        df_submission.iloc[ID, 0] = ID
        df_submission.iloc[ID, 1] = int(pred)

    # Create csv with predictions to upload to Leaderboard
    df_submission.Id       = df_submission.Id.astype(int)
    df_submission.Category = df_submission.Category.astype(int)
    df_submission.to_csv(CSV_NAME, index=False)
