In [1]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
from tqdm import tqdm
# from openslide import OpenSlide

import torch
from torch import nn
from torch.utils.data import (
    ConcatDataset,
    DataLoader,
    Dataset,
    Subset,
    SubsetRandomSampler,
    TensorDataset,
    random_split,
)

import torchvision
from torchvision import transforms
from PIL import Image

# import einops

# from eval_metrics import print_metrics_regression
from sklearn import metrics as sklearn_metrics

In [2]:
train = pd.read_pickle("./datasets/train.pkl")
train_x = train["x"]
train_y = train["y"]
train_id = train["id"]
train_x = torch.tensor(torch.stack(train_x).detach().cpu().numpy())
train_y = torch.tensor(train_y)

test = pd.read_pickle("./datasets/test.pkl")
test_x = test["x"]
test_y = test["y"]
test_id = test["id"]
test_x = torch.tensor(torch.stack(test_x).detach().cpu().numpy())
test_y = torch.tensor(test_y)

In [3]:
min_label = train_y.min().item()
max_label = train_y.max().item()
train_y = (train_y-min_label)/(max_label-min_label)
test_y = (test_y-min_label)/(max_label-min_label)

min_label, max_label

(0.0, 4.0)

In [4]:
def min_max_norm(x, min_label=min_label, max_label=max_label):
    return (x-min_label)/(max_label-min_label)

def reverse_min_max_norm(x, min_label=min_label, max_label=max_label):
    return x*(max_label-min_label)+min_label

In [5]:
train_x.shape, train_y.shape, len(train_id), test_x.shape, test_y.shape, len(test_id)

(torch.Size([10206, 3, 224, 224]),
 torch.Size([10206]),
 10206,
 torch.Size([10205, 3, 224, 224]),
 torch.Size([10205]),
 10205)

In [6]:
class ImageDataset(Dataset):
    def __init__(self, x, y, biopsy_id):
        self.x = x # img_tensor_list
        self.y = y # label
        self.biopsy_id = biopsy_id

    def __getitem__(self, index):
        return self.x[index], self.y[index], self.biopsy_id[index]

    def __len__(self):
        return len(self.x)

In [7]:
batch_size = 256

epochs = 50
learning_rate = 2e-4
momentum = 0.9
weight_decay = 0 # 1e-8

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [8]:
train_dataset = ImageDataset(train_x, train_y, train_id)
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_dataset = ImageDataset(test_x, test_y, test_id)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [9]:
# for data in train_dataset:
#     x, y, biopsy_id = data
#     print(x.shape, y, biopsy_id)

In [10]:
def mse_loss(y_pred, y_true):
    loss_fn = nn.MSELoss()
    return loss_fn(y_pred, y_true)

def focal_mse_loss(inputs, targets, activate='sigmoid', beta=.2, gamma=1):
    loss = (inputs - targets) ** 2
    loss *= (torch.tanh(beta * torch.abs(inputs - targets))) ** gamma if activate == 'tanh' else \
        (2 * torch.sigmoid(beta * torch.abs(inputs - targets)) - 1) ** gamma
    loss = torch.mean(loss)
    return loss

def huber_loss(inputs, targets, beta=1.):
    l1_loss = torch.abs(inputs - targets)
    cond = l1_loss < beta
    loss = torch.where(cond, 0.5 * l1_loss ** 2 / beta, l1_loss - 0.5 * beta)
    loss = torch.mean(loss)
    return loss

criterion = mse_loss

In [11]:
def train_epoch(model, dataloader, loss_fn, optimizer, scheduler):
    train_loss = []
    model.train()
    for step, data in enumerate(dataloader):
        batch_x, batch_y, batch_biopsy_id = data
        batch_x, batch_y = (
            batch_x.float().to(device),
            batch_y.float().to(device),
        )
        optimizer.zero_grad()
        # print(batch_x.device, batch_x.shape)
        # print(next(model.parameters()).is_cuda)
        output = model(batch_x)
        output = torch.squeeze(output, dim=1)
        # print(output.shape, batch_y.shape)
        
        loss = loss_fn(output, batch_y)
        train_loss.append(loss.item())
        loss.backward()
        optimizer.step()
    metric_train_loss = np.array(train_loss).mean()
    scheduler.step(metric_train_loss)
    return metric_train_loss

def val_epoch(model, dataloader):
    y_pred = {} # key: biopsy_id, value: List[slice_stage_pred]
    y_true = {} # key: biopsy_id, value: List[slice_stage_pred]
    model.eval()
    with torch.no_grad():
        for step, data in enumerate(dataloader):
            # print(step)
            batch_x, batch_y, batch_biopsy_id = data
            batch_x, batch_y = (
                batch_x.float().to(device),
                batch_y.float().to(device),
            )
            output = model(batch_x)
            output = torch.squeeze(output, dim=1)
            output = output.detach().cpu().numpy().tolist()
            batch_y = batch_y.detach().cpu().numpy().tolist()

            for i in range(len(batch_biopsy_id)):
                biopsy_id = batch_biopsy_id[i]
                if biopsy_id not in y_pred:
                    y_pred[biopsy_id] = []
                    y_true[biopsy_id] = []
                y_pred[biopsy_id].append(output[i])
                y_true[biopsy_id].append(batch_y[i])
    
    prediction_list = []
    ground_truth_list = []
    for biopsy_id in y_pred:
        preds = np.array(y_pred[biopsy_id])
        truths = np.array(y_true[biopsy_id])
        prediction_list.append(preds.mean())
        ground_truth_list.append(truths.mean())
    prediction_list = np.array(prediction_list)
    ground_truth_list = np.array(ground_truth_list)
    prediction_list = reverse_min_max_norm(prediction_list)
    ground_truth_list = reverse_min_max_norm(ground_truth_list)

    mse = sklearn_metrics.mean_squared_error(ground_truth_list, prediction_list)
    return mse

In [12]:
# model = torchvision.models.resnet18(num_classes=1)
model = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
hidden_dim = model.fc.in_features
out_dim = 1

model.fc = nn.Sequential(
    nn.Linear(hidden_dim, hidden_dim//16),
    nn.GELU(),
    nn.Linear(hidden_dim//16, out_dim),
    nn.Sigmoid()
)

# model.load_state_dict(torch.load('./checkpoints/resnet18-f37072fd.pth'), strict=False)
model.load_state_dict(torch.load('./checkpoints/resnet50-11ad3fa6.pth'), strict=False)

model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [13]:
best_score = 1e8
for epoch in range(epochs):
    # print(f'Running epoch {epoch} ...')
    train_loss = train_epoch(
        model,
        train_loader,
        criterion,
        optimizer,
        scheduler
    )
    print(f"Epoch {epoch}: Loss = {train_loss}")
    if epoch % 1 == 0:
        metric_valid = val_epoch(model, test_loader)
        print("Val Score:", metric_valid)
        if metric_valid < best_score:
            best_score = metric_valid
            print("Saving best model ...")
            torch.save(
                model.state_dict(),
                f"./checkpoints/model_resnet50.ckpt",
            )
    

Epoch 0: Loss = 0.053634231071919204
Val Score: 0.692208026844931
Saving best model ...
Epoch 1: Loss = 0.03878230648115277
Val Score: 0.6546129105283747
Saving best model ...
Epoch 2: Loss = 0.02708146444056183
Val Score: 0.6483528409185323
Saving best model ...
Epoch 3: Loss = 0.018828779296018183
Val Score: 0.6791260418825155
Epoch 4: Loss = 0.017115258367266507
Val Score: 0.8715568246490456
Epoch 5: Loss = 0.01586588758509606
Val Score: 0.7695032445996475
Epoch 6: Loss = 0.01316613903036341
Val Score: 0.7646366392928919
Epoch 7: Loss = 0.01137981122592464
Val Score: 0.6718654984570094
Epoch 8: Loss = 0.010101343021960928
Val Score: 0.6938697026242245
Epoch 9: Loss = 0.00862907776609063
Val Score: 0.7457029309545187
Epoch 10: Loss = 0.007600214006379247
Val Score: 0.8658422557676317
Epoch 11: Loss = 0.006624160031788051
Val Score: 0.6813749360389835
Epoch 12: Loss = 0.00578711973503232
Val Score: 0.7335255319291989
Epoch 13: Loss = 0.004475378541974351
Val Score: 0.694105713798105
E

In [14]:
best_score

0.615047228315182

: 