In [2]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
from tqdm import tqdm
# from openslide import OpenSlide

import torch
from torch import nn
from torch.utils.data import (
    ConcatDataset,
    DataLoader,
    Dataset,
    Subset,
    SubsetRandomSampler,
    TensorDataset,
    random_split,
)

import torchvision
from torchvision import transforms
from PIL import Image

# import einops

from eval_metrics import print_metrics_binary
# from sklearn import metrics as sklearn_metrics


In [8]:
data_dir = "./datasets/v2/"

In [9]:
train = pd.read_pickle(data_dir+"train.pkl")
train_x = train["x"]
train_y = train["y"]
train_id = train["id"]
train_x = torch.tensor(torch.stack(train_x).detach().cpu().numpy())
train_y = torch.tensor(train_y)

test = pd.read_pickle(data_dir+"test.pkl")
test_x = test["x"]
test_y = test["y"]
test_id = test["id"]
test_x = torch.tensor(torch.stack(test_x).detach().cpu().numpy())
test_y = torch.tensor(test_y)

In [10]:
min_label = train_y.min().item()
max_label = train_y.max().item()
# train_y = (train_y-min_label)/(max_label-min_label)
# test_y = (test_y-min_label)/(max_label-min_label)

min_label, max_label

(0.0, 4.0)

In [11]:
train_y[train_y!=1] = 0
test_y[test_y!=1] = 0

In [12]:
def min_max_norm(x, min_label=min_label, max_label=max_label):
    return (x-min_label)/(max_label-min_label)

def reverse_min_max_norm(x, min_label=min_label, max_label=max_label):
    return x*(max_label-min_label)+min_label

In [13]:
train_x.shape, train_y.shape, len(train_id), test_x.shape, test_y.shape, len(test_id)

(torch.Size([10206, 3, 224, 224]),
 torch.Size([10206]),
 10206,
 torch.Size([10205, 3, 224, 224]),
 torch.Size([10205]),
 10205)

In [14]:
class ImageDataset(Dataset):
    def __init__(self, x, y, biopsy_id):
        self.x = x # img_tensor_list
        self.y = y # label
        self.biopsy_id = biopsy_id

    def __getitem__(self, index):
        return self.x[index], self.y[index], self.biopsy_id[index]

    def __len__(self):
        return len(self.x)

In [15]:
batch_size = 256

epochs = 50
learning_rate = 2e-4
momentum = 0.9
weight_decay = 0 # 1e-8

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [16]:
train_dataset = ImageDataset(train_x, train_y, train_id)
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_dataset = ImageDataset(test_x, test_y, test_id)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [17]:
# for data in train_dataset:
#     x, y, biopsy_id = data
#     print(x.shape, y, biopsy_id)

In [18]:
def mse_loss(y_pred, y_true):
    loss_fn = nn.MSELoss()
    return loss_fn(y_pred, y_true)

def focal_mse_loss(inputs, targets, activate='sigmoid', beta=.2, gamma=1):
    loss = (inputs - targets) ** 2
    loss *= (torch.tanh(beta * torch.abs(inputs - targets))) ** gamma if activate == 'tanh' else \
        (2 * torch.sigmoid(beta * torch.abs(inputs - targets)) - 1) ** gamma
    loss = torch.mean(loss)
    return loss

def huber_loss(inputs, targets, beta=1.):
    l1_loss = torch.abs(inputs - targets)
    cond = l1_loss < beta
    loss = torch.where(cond, 0.5 * l1_loss ** 2 / beta, l1_loss - 0.5 * beta)
    loss = torch.mean(loss)
    return loss


In [19]:
def bce_loss(y_pred, y_true):
    loss_fn = nn.BCELoss()
    return loss_fn(y_pred, y_true)
criterion = bce_loss

In [24]:
def train_epoch(model, dataloader, loss_fn, optimizer, scheduler):
    train_loss = []
    score = []
    model.train()
    for step, data in enumerate(dataloader):
        # if step == 2: break
        batch_x, batch_y, _ = data
        batch_x, batch_y = (
            batch_x.float().to(device),
            batch_y.float().to(device),
        )
        optimizer.zero_grad()
        output = model(batch_x)
        output = torch.squeeze(output)
        # print(batch_y, output)
        # batch_y = torch.unsqueeze(batch_y, dim=-1)
        # print(batch_y.shape, output.shape, output, batch_y)
        # print(output.shape, batch_y.shape)
        loss = loss_fn(output, batch_y)
        train_loss.append(loss.item())
        loss.backward()
        optimizer.step()
    scheduler.step(np.array(train_loss).mean())
    return np.array(train_loss).mean()

In [25]:
def val_epoch(model, dataloader):
    y_pred = []
    y_true = []
    len_list = []
    model.eval()
    with torch.no_grad():
        for _, data in enumerate(dataloader):
            batch_x, batch_y, _ = data
            batch_x, batch_y = (
                batch_x.float().to(device),
                batch_y.float().to(device)
            )
            output = model(batch_x)
            output = torch.squeeze(output, dim=0)
            # print(type(output), output.shape, output)
            y_pred.extend(output.tolist())
            y_true.extend(batch_y.tolist())
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_pred = np.stack([1-y_pred, y_pred], axis=1)
    eval_metric = print_metrics_binary(y_true, y_pred, verbose=0)
    # print("accuracy = {:6.5f}".format(eval_metric['acc']))
    # print("aruoc = {:6.5f}".format(eval_metric['auroc']))
    # print("auprc = {:6.5f}".format(eval_metric['auprc']))
    return eval_metric["acc"]

In [3]:
# model = torchvision.models.resnet18(num_classes=1)
model = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
hidden_dim = 1280
out_dim = 1

model.classifier = nn.Sequential(
    nn.Linear(hidden_dim, hidden_dim//16),
    nn.GELU(),
    nn.Linear(hidden_dim//16, out_dim),
    nn.Sigmoid()
)

# model.load_state_dict(torch.load('./checkpoints/resnet18-f37072fd.pth'), strict=False)
model.load_state_dict(torch.load('./checkpoints/resnet50-11ad3fa6.pth'), strict=False)

model.to(device)

NameError: name 'learning_rate' is not defined

In [26]:
best_score = 1e8
for epoch in range(epochs):
    # print(f'Running epoch {epoch} ...')
    train_loss = train_epoch(
        model,
        train_loader,
        criterion,
        optimizer,
        scheduler
    )
    print(f"Epoch {epoch}: Loss = {train_loss}")
    if epoch % 1 == 0:
        metric_valid = val_epoch(model, test_loader)
        print("Val Score:", metric_valid)
        if metric_valid < best_score:
            best_score = metric_valid
            # print("Saving best model ...")
            # torch.save(
            #     model.state_dict(),
            #     f"./checkpoints/model_resnet50.ckpt",
            # )
    

Epoch 0: Loss = 0.6916041433811188
Val Score: 0.5157276
Epoch 1: Loss = 0.6873723775148392
Val Score: 0.5363057
Epoch 2: Loss = 0.6707751169800759
Val Score: 0.52170503
Epoch 3: Loss = 0.630525815486908
Val Score: 0.52493876
Epoch 4: Loss = 0.611129391938448
Val Score: 0.53855956
Epoch 5: Loss = 0.5188729338347912
Val Score: 0.54140127
Epoch 6: Loss = 0.45672066435217856
Val Score: 0.5393435
Epoch 7: Loss = 0.3926995176821947
Val Score: 0.5364037
Epoch 8: Loss = 0.31119989417493343
Val Score: 0.5363057
Epoch 9: Loss = 0.24947825372219085
Val Score: 0.5247428
Epoch 10: Loss = 0.19931699894368649
Val Score: 0.53317004
Epoch 11: Loss = 0.162342637591064
Val Score: 0.5346399
Epoch 12: Loss = 0.13619974721223116
Val Score: 0.5358158
Epoch 13: Loss = 0.11838107155635953
Val Score: 0.53787357
Epoch 14: Loss = 0.09776650238782167
Val Score: 0.53718764
Epoch 15: Loss = 0.07880826322361827
Val Score: 0.53512985
Epoch 16: Loss = 0.06243135710246861
Val Score: 0.5383636
Epoch 17: Loss = 0.04782906

In [None]:
best_score

0.615047228315182

: 