In [1]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
from tqdm import tqdm
# from openslide import OpenSlide

import torch
from torch import nn
from torch.utils.data import (
    ConcatDataset,
    DataLoader,
    Dataset,
    Subset,
    SubsetRandomSampler,
    TensorDataset,
    random_split,
)

import torchvision
from torchvision import transforms
from PIL import Image

# import einops

# from eval_metrics import print_metrics_regression
from sklearn import metrics as sklearn_metrics

In [2]:
data_dir = "./datasets/v2/"

In [3]:
train = pd.read_pickle(data_dir+"train.pkl")
train_x = train["x"]
train_y = train["y"]
train_id = train["id"]
train_x = torch.tensor(torch.stack(train_x).detach().cpu().numpy())
train_y = torch.tensor(train_y)

test = pd.read_pickle(data_dir+"test.pkl")
test_x = test["x"]
test_y = test["y"]
test_id = test["id"]
test_x = torch.tensor(torch.stack(test_x).detach().cpu().numpy())
test_y = torch.tensor(test_y)

In [4]:
min_label = train_y.min().item()
max_label = train_y.max().item()
train_y = (train_y-min_label)/(max_label-min_label)
test_y = (test_y-min_label)/(max_label-min_label)

min_label, max_label

(0.0, 4.0)

In [5]:
def min_max_norm(x, min_label=min_label, max_label=max_label):
    return (x-min_label)/(max_label-min_label)

def reverse_min_max_norm(x, min_label=min_label, max_label=max_label):
    return x*(max_label-min_label)+min_label

In [6]:
train_x.shape, train_y.shape, len(train_id), test_x.shape, test_y.shape, len(test_id)

(torch.Size([10206, 3, 224, 224]),
 torch.Size([10206]),
 10206,
 torch.Size([10205, 3, 224, 224]),
 torch.Size([10205]),
 10205)

In [7]:
class ImageDataset(Dataset):
    def __init__(self, x, y, biopsy_id):
        self.x = x # img_tensor_list
        self.y = y # label
        self.biopsy_id = biopsy_id

    def __getitem__(self, index):
        return self.x[index], self.y[index], self.biopsy_id[index]

    def __len__(self):
        return len(self.x)

In [8]:
batch_size = 64

epochs = 50
learning_rate = 1e-3
momentum = 0.9
weight_decay = 0 # 1e-8

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [9]:
train_dataset = ImageDataset(train_x, train_y, train_id)
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_dataset = ImageDataset(test_x, test_y, test_id)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [10]:
# for data in train_dataset:
#     x, y, biopsy_id = data
#     print(x.shape, y, biopsy_id)

In [11]:
def mse_loss(y_pred, y_true):
    loss_fn = nn.MSELoss()
    return loss_fn(y_pred, y_true)

def focal_mse_loss(inputs, targets, activate='sigmoid', beta=.2, gamma=1):
    loss = (inputs - targets) ** 2
    loss *= (torch.tanh(beta * torch.abs(inputs - targets))) ** gamma if activate == 'tanh' else \
        (2 * torch.sigmoid(beta * torch.abs(inputs - targets)) - 1) ** gamma
    loss = torch.mean(loss)
    return loss

def huber_loss(inputs, targets, beta=1.):
    l1_loss = torch.abs(inputs - targets)
    cond = l1_loss < beta
    loss = torch.where(cond, 0.5 * l1_loss ** 2 / beta, l1_loss - 0.5 * beta)
    loss = torch.mean(loss)
    return loss

criterion = mse_loss

In [12]:
def train_epoch(model, dataloader, loss_fn, optimizer, scheduler):
    train_loss = []
    model.train()
    for step, data in enumerate(dataloader):
        batch_x, batch_y, batch_biopsy_id = data
        batch_x, batch_y = (
            batch_x.float().to(device),
            batch_y.float().to(device),
        )
        optimizer.zero_grad()
        # print(batch_x.device, batch_x.shape)
        # print(next(model.parameters()).is_cuda)
        output = model(batch_x)
        output = torch.squeeze(output, dim=1)
        # print(output.shape, batch_y.shape)
        
        loss = loss_fn(output, batch_y)
        train_loss.append(loss.item())
        loss.backward()
        optimizer.step()
    metric_train_loss = np.array(train_loss).mean()
    scheduler.step(metric_train_loss)
    return metric_train_loss

def val_epoch(model, dataloader):
    y_pred = {} # key: biopsy_id, value: List[slice_stage_pred]
    y_true = {} # key: biopsy_id, value: List[slice_stage_pred]
    model.eval()
    with torch.no_grad():
        for step, data in enumerate(dataloader):
            # print(step)
            batch_x, batch_y, batch_biopsy_id = data
            batch_x, batch_y = (
                batch_x.float().to(device),
                batch_y.float().to(device),
            )
            output = model(batch_x)
            output = torch.squeeze(output, dim=1)
            output = output.detach().cpu().numpy().tolist()
            batch_y = batch_y.detach().cpu().numpy().tolist()

            for i in range(len(batch_biopsy_id)):
                biopsy_id = batch_biopsy_id[i]
                if biopsy_id not in y_pred:
                    y_pred[biopsy_id] = []
                    y_true[biopsy_id] = []
                y_pred[biopsy_id].append(output[i])
                y_true[biopsy_id].append(batch_y[i])
    
    prediction_list = []
    ground_truth_list = []
    for biopsy_id in y_pred:
        preds = np.array(y_pred[biopsy_id])
        truths = np.array(y_true[biopsy_id])
        prediction_list.append(preds.mean())
        ground_truth_list.append(truths.mean())
    prediction_list = np.array(prediction_list)
    ground_truth_list = np.array(ground_truth_list)
    prediction_list = reverse_min_max_norm(prediction_list)
    ground_truth_list = reverse_min_max_norm(ground_truth_list)

    mse = sklearn_metrics.mean_squared_error(ground_truth_list, prediction_list)
    return mse

In [13]:
# ConvNext

model = torchvision.models.convnext_small()
model.load_state_dict(torch.load('./checkpoints/convnext_small-0c510722.pth'), strict=True)
# print(model)

hidden_dim = 768
out_dim = 1

model.classifier = nn.Sequential(
    nn.Flatten(1),
    # nn.Linear(hidden_dim, 48),
    # nn.GELU(),
    # # nn.Dropout(0.1),
    # nn.Linear(48, out_dim),
    n'n
    nn.Sigmoid()
)

# model.load_state_dict(torch.load('./checkpoints/resnet18-f37072fd.pth'), strict=False)
# model.load_state_dict(torch.load('./checkpoints/resnet50-11ad3fa6.pth'), strict=False)


model.to(device)

ConvNeXt(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
    )
    (1): Sequential(
      (0): CNBlock(
        (block): Sequential(
          (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (1): Permute()
          (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (3): Linear(in_features=96, out_features=384, bias=True)
          (4): GELU(approximate=none)
          (5): Linear(in_features=384, out_features=96, bias=True)
          (6): Permute()
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): CNBlock(
        (block): Sequential(
          (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (1): Permute()
          (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (3): Linear(in_features=96

In [14]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

In [15]:
best_score = 0.615047228315182
for epoch in range(epochs):
    # print(f'Running epoch {epoch} ...')
    train_loss = train_epoch(
        model,
        train_loader,
        criterion,
        optimizer,
        scheduler
    )
    print(f"Epoch {epoch}: Loss = {train_loss}")
    if epoch % 1 == 0:
        metric_valid = val_epoch(model, test_loader)
        print("Val Score:", metric_valid)
        if metric_valid < best_score:
            best_score = metric_valid
            print("Saving best model ...")
            torch.save(
                model.state_dict(),
                f"./checkpoints/model_convnext_1110.ckpt",
            )
    

Epoch 0: Loss = 0.052852544613415375
Val Score: 0.7978317029201518
Epoch 1: Loss = 0.04687917011142417
Val Score: 0.7789906333540144
Epoch 2: Loss = 0.04618243996374076
Val Score: 0.780803423608625
Epoch 3: Loss = 0.04604303332962445
Val Score: 0.7797163413951642
Epoch 4: Loss = 0.04594555248477263
Val Score: 0.777574789808019
Epoch 5: Loss = 0.045949965598993
Val Score: 0.7812420026301151
Epoch 6: Loss = 0.045862963308172765
Val Score: 0.787270633126852
Epoch 7: Loss = 0.06079500530686346
Val Score: 0.8320277945347391
Epoch 8: Loss = 0.04958115146291675
Val Score: 0.831435617487219
Epoch 9: Loss = 0.04841344957440015


KeyboardInterrupt: 

In [None]:
best_score