In [6]:
import pandas as pd
import numpy as np
from tqdm import tqdm
# from openslide import OpenSlide
from pathlib import Path
import torch
import torchvision
from torch import nn
from torchvision import transforms
from PIL import Image

from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelBinarizer

SEED = 42
dataset_base_path = "./datasets/"

In [7]:
holdout_mapping = pd.read_csv('./csv_dir/holdout_mapping.csv')

In [8]:
holdout_slide_map = {}
"""
key: slide_id
value: Tuple(biopsy_id, slide_path, label=-1)
"""
for idx, row in holdout_mapping.iterrows():
    holdout_slide_map[row['slide_id']] = (row['biopsy_id'], row['downsampled_path'], -1)

In [9]:
len(holdout_mapping)

14466

In [10]:
transform_aug_test = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

In [11]:
class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, slide_map, mode='train', transform=None): 
        self.slide_map = slide_map
        self.data = list(slide_map.values())
        
        self.mode = mode # train/test
        self.transform = transform

    def __getitem__(self, index):
        biopsy_id, path, label = self.data[index]
        x_pil = Image.open(dataset_base_path + path)
        # if self.mode=='train': x_tensor = transform_aug_train(x_pil)
        # elif self.mode in ['test', 'holdout']: x_tensor = transform_aug_test(x_pil)
        x_tensor = self.transform(x_pil)
        return x_tensor, label, biopsy_id

    def __len__(self):
        return len(self.slide_map)

In [12]:
batch_size = 128

epochs = 50
learning_rate = 1e-3
momentum = 0.9
weight_decay = 0 # 1e-8

hidden_dim = 512 # ResNet50: 2048, ResNet18: 512
num_classes = 5
out_dim = num_classes # [0,1,2,3,4]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [13]:
holdout_dataset = ImageDataset(holdout_slide_map, mode='test', transform=transform_aug_test)
holdout_loader = torch.utils.data.DataLoader(holdout_dataset, batch_size=batch_size)

In [14]:
label_binarizer = LabelBinarizer().fit(np.arange(num_classes))

def get_score(y_true, y_pred):
    """
    # assume y_true: [0,1,2,3,4,3,2,...] discrete numbers
    # assume y_pred: tensor of shape (batch_size, num_classes)
    # where num_classes = 5 for this task

    # compute AUC for each class
    """
    y_true_onehot = label_binarizer.transform(y_true)
    macro_roc_auc_ovr = roc_auc_score(
        y_true_onehot,
        y_pred,
        multi_class="ovr",
        average="macro",
    )
    return macro_roc_auc_ovr

In [15]:
model = torchvision.models.resnet18()
# model.load_state_dict(torch.load('./checkpoints/resnet50-11ad3fa6.pth'), strict=True)

In [16]:
model.fc = nn.Sequential(
    # nn.Linear(hidden_dim, hidden_dim//16),
    # nn.GELU(),
    # nn.Linear(hidden_dim//16, out_dim),
    nn.Linear(hidden_dim, out_dim),
    nn.Softmax(dim=1)
)

model.load_state_dict(torch.load('checkpoints/model_resnet18_0415.ckpt'), strict=True)

model = model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [17]:
def val_epoch(model, dataloader):
    y_pred = {} # key: biopsy_id, value: List[slice_stage_pred]
    y_true = {} # key: biopsy_id, value: List[slice_stage_pred]
    model.eval()
    with torch.no_grad():
        for step, data in enumerate(dataloader):
            # if step % 50 == 0: print(f"Validating... Step={step}")
            batch_x, _, batch_biopsy_id = data
            batch_x = batch_x.float().to(device)
            output = model(batch_x)
            # output = torch.argmax(output, dim=-1)
            output = output.detach().cpu().numpy().tolist()

            for i in range(len(batch_biopsy_id)):
                biopsy_id = batch_biopsy_id[i]
                if biopsy_id not in y_pred:
                    y_pred[biopsy_id] = []
                    y_true[biopsy_id] = []
                y_pred[biopsy_id].append(output[i])
    
    submit_result_dict = {}
    for biopsy_id in y_pred:
        preds = np.array(y_pred[biopsy_id]).mean(axis=0)
        stage = np.argmax(preds)
        submit_result_dict[biopsy_id] = (preds, stage) 
    return submit_result_dict

In [18]:
submit_result_dict = val_epoch(model, holdout_loader)

In [19]:
biopsy_id_list = []
probability_stage_list = []
stage_list = []

for biopsy_id in submit_result_dict:
    biopsy_id_list.append(biopsy_id)
    probability_stage_list.append(submit_result_dict[biopsy_id][0])
    stage_list.append(submit_result_dict[biopsy_id][1])

In [32]:
import csv
probability_stage_ndarray = np.array(probability_stage_list)

with open("submit.csv", "w") as infile:
    writer = csv.writer(infile)
    # writer.writerow(["header01", "header02"])
    for i in zip(biopsy_id_list, probability_stage_ndarray[:,0].tolist(), probability_stage_ndarray[:,1].tolist(), probability_stage_ndarray[:,2].tolist(), probability_stage_ndarray[:,3].tolist(), probability_stage_ndarray[:,4].tolist(), stage_list):
        writer.writerow(i)