In [1]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
from tqdm import tqdm
# from openslide import OpenSlide

import torch
from torch import nn
from torch.utils.data import (
    ConcatDataset,
    DataLoader,
    Dataset,
    Subset,
    SubsetRandomSampler,
    TensorDataset,
    random_split,
)

import torchvision
from torchvision import transforms
from PIL import Image

# import einops

# from eval_metrics import print_metrics_regression
from sklearn import metrics as sklearn_metrics

In [11]:
holdout = pd.read_pickle("./datasets/holdout.pkl")
holdout_x = holdout["x"]
holdout_y = holdout["y"]
holdout_id = holdout["id"]
holdout_x = torch.tensor(torch.stack(holdout_x).detach().cpu().numpy())
holdout_y = torch.tensor(holdout_y)

In [12]:
min_label = 0.
max_label = 4.

def reverse_min_max_norm(x, min_label=min_label, max_label=max_label):
    return x*(max_label-min_label)+min_label

In [13]:
class ImageDataset(Dataset):
    def __init__(self, x, y, biopsy_id):
        self.x = x # img_tensor_list
        self.y = y # label
        self.biopsy_id = biopsy_id

    def __getitem__(self, index):
        return self.x[index], self.y[index], self.biopsy_id[index]

    def __len__(self):
        return len(self.x)

In [14]:
batch_size = 256

epochs = 50
learning_rate = 1e-4
momentum = 0.9
weight_decay = 0 # 1e-8

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [15]:
holdout_dataset = ImageDataset(holdout_x, holdout_y, holdout_id)
holdout_loader = DataLoader(holdout_dataset, batch_size=batch_size)

In [16]:
model = torchvision.models.resnet18(num_classes=1)
hidden_dim = model.fc.in_features
out_dim = 1

model.fc = nn.Sequential(
    nn.Linear(hidden_dim, hidden_dim//16),
    nn.GELU(),
    nn.Linear(hidden_dim//16, out_dim),
    nn.Sigmoid()
)

model.load_state_dict(torch.load('./checkpoints/model_resnet18.ckpt'), strict=False)
# model.load_state_dict(torch.load('./checkpoints/resnet50-11ad3fa6.pth'), strict=False)

model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [17]:
def test_epoch(model, dataloader):
    y_pred = {} # key: biopsy_id, value: List[slice_stage_pred]
    model.eval()
    with torch.no_grad():
        for step, data in enumerate(dataloader):
            # print(step)
            batch_x, batch_y, batch_biopsy_id = data
            batch_x, batch_y = (
                batch_x.float().to(device),
                batch_y.float().to(device),
            )
            output = model(batch_x)
            output = torch.squeeze(output, dim=1)
            output = output.detach().cpu().numpy().tolist()

            for i in range(len(batch_biopsy_id)):
                biopsy_id = batch_biopsy_id[i]
                if biopsy_id not in y_pred:
                    y_pred[biopsy_id] = []
                y_pred[biopsy_id].append(output[i])
    
    submit_result_dict = {}
    for biopsy_id in y_pred:
        preds = np.array(y_pred[biopsy_id])
        submit_result_dict[biopsy_id] = reverse_min_max_norm(preds.mean())
    return submit_result_dict

In [18]:
submit_result_dict = test_epoch(model, holdout_loader)

In [19]:
biopsy_id_list = []
biopsy_stage_list = []

for biopsy_id in submit_result_dict:
    biopsy_id_list.append(biopsy_id)
    biopsy_stage_list.append(submit_result_dict[biopsy_id])

In [20]:
import csv    

with open("submit.csv", "w") as infile:
    writer = csv.writer(infile)
    # writer.writerow(["header01", "header02"])
    for i in zip(biopsy_id_list, biopsy_stage_list):
        writer.writerow(i)