In [1]:
import os
import json
import glob
import random
import collections

import numpy as np
import pandas as pd
import pydicom as dicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
package_path = "../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master/"
import sys 
sys.path.append(package_path)

import time

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional
import efficientnet_pytorch

#from sklearn.model_selection import StratifiedKFold

## Support Function

In [3]:
def load_dicom(path):
    image = dicom.read_file(path)
    data = image.pixel_array
    data = data - np.min(data)
    if(np.max(data) != 0):
        data = data/np.max(data)
    data = (data *256).astype(np.uint8)
    data = cv2.resize(data, (256, 256))
    return data

In [4]:
def is_valid_image(path, threshold=10):
    data = load_dicom(path)
    if np.mean(data)<threshold:
        return False
    else:
        return True

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(42)

## Visualize on labels

In [None]:
df = pd.read_csv("../input/dffull/filexin1.csv", index_col = 0)
sns.countplot(x = df.label, data = df);

In [None]:
df_t2w = df[df['scan_type']=='T2w']
display(df_t2w.head(3))

In [None]:
class DataCustomer(torch_data.Dataset):
    def __init__(self, paths, labels):
        self.paths = paths
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, index):
        data_path = self.paths[index]
        dt = load_dicom(data_path)
        dtt = []
        dtt.append(dt)
        dtt.append(dt)
        dtt.append(dt)
        X = torch.tensor(dtt).float()
        #X = torch.tensor(load_dicom(data_path))
        Y = torch.tensor(self.labels[index]).float()
        return {"X":X, "y":Y}

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = efficientnet_pytorch.EfficientNet.from_name("efficientnet-b0")
        checkpoint = torch.load("../input/efficientnet-pytorch/efficientnet-b0-08094119.pth")
        self.net.load_state_dict(checkpoint)
        n_features = self.net._fc.in_features
        self.net._fc = nn.Linear(in_features=n_features, out_features=1, bias=True)
    
    def forward(self, x):
        out = self.net(x)
        return out

In [None]:
class LossMeter:
    def __init__(self):
        self.avg = 0
        self.n = 0

    def update(self, val):
        self.n += 1
        # incremental update
        self.avg = val / self.n + (self.n - 1) / self.n * self.avg

        
class AccMeter:
    def __init__(self):
        self.avg = 0
        self.n = 0
        
    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().astype(int)
        y_pred = y_pred.cpu().numpy() >= 0
        last_n = self.n
        self.n += len(y_true)
        true_count = np.sum(y_true == y_pred)
        # incremental update
        self.avg = true_count / self.n + last_n / self.n * self.avg

In [None]:
class Trainer:
    def __init__(
        self, 
        model, 
        device, 
        optimizer, 
        criterion, 
        loss_meter, 
        score_meter
    ):
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.criterion = criterion
        self.loss_meter = loss_meter
        self.score_meter = score_meter
        
        self.best_valid_score = -np.inf
        self.n_patience = 0
        
        self.messages = {
            "epoch": "[Epoch {}: {}] loss: {:.5f}, score: {:.5f}, time: {} s",
            "checkpoint": "The score improved from {:.5f} to {:.5f}. Save model to '{}'",
            "patience": "\nValid score didn't improve last {} epochs."
        }
    
    def fit(self, epochs, train_loader, valid_loader, save_path, patience):        
        for n_epoch in range(1, epochs + 1):
            self.info_message("EPOCH: {}", n_epoch)
            
            train_loss, train_score, train_time = self.train_epoch(train_loader)
            valid_loss, valid_score, valid_time = self.valid_epoch(valid_loader)
            
            self.info_message(
                self.messages["epoch"], "Train", n_epoch, train_loss, train_score, train_time
            )
            
            self.info_message(
                self.messages["epoch"], "Valid", n_epoch, valid_loss, valid_score, valid_time
            )

            if True:
#             if self.best_valid_score < valid_score:
                self.info_message(
                    self.messages["checkpoint"], self.best_valid_score, valid_score, save_path
                )
                self.best_valid_score = valid_score
                self.save_model(n_epoch, save_path)
                self.n_patience = 0
            else:
                self.n_patience += 1
            
            if self.n_patience >= patience:
                self.info_message(self.messages["patience"], patience)
                break
            
    def train_epoch(self, train_loader):
        self.model.train()
        t = time.time()
        train_loss = self.loss_meter()
        train_score = self.score_meter()
        
        for step, batch in enumerate(train_loader, 1):
            X = batch["X"].to(self.device)
            targets = batch["y"].to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(X).squeeze(1)
            
            loss = self.criterion(outputs, targets)
            loss.backward()

            train_loss.update(loss.detach().item())
            train_score.update(targets, outputs.detach())

            self.optimizer.step()
            
            _loss, _score = train_loss.avg, train_score.avg
            message = 'Train Step {}/{}, train_loss: {:.5f}, train_score: {:.5f}'
            self.info_message(message, step, len(train_loader), _loss, _score, end="\r")
        
        return train_loss.avg, train_score.avg, int(time.time() - t)
    
    def valid_epoch(self, valid_loader):
        self.model.eval()
        t = time.time()
        valid_loss = self.loss_meter()
        valid_score = self.score_meter()

        for step, batch in enumerate(valid_loader, 1):
            with torch.no_grad():
                X = batch["X"].to(self.device)
                targets = batch["y"].to(self.device)

                outputs = self.model(X).squeeze(1)
                loss = self.criterion(outputs, targets)

                valid_loss.update(loss.detach().item())
                valid_score.update(targets, outputs)
                
            _loss, _score = valid_loss.avg, valid_score.avg
            message = 'Valid Step {}/{}, valid_loss: {:.5f}, valid_score: {:.5f}'
            self.info_message(message, step, len(valid_loader), _loss, _score, end="\r")
        
        return valid_loss.avg, valid_score.avg, int(time.time() - t)
    
    def save_model(self, n_epoch, save_path):
        torch.save(
            {
                "model_state_dict": self.model.state_dict(),
                "optimizer_state_dict": self.optimizer.state_dict(),
                "best_valid_score": self.best_valid_score,
                "n_epoch": n_epoch,
            },
            save_path,
        )
    
    @staticmethod
    def info_message(message, *args, end="\n"):
        print(message.format(*args), end=end)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = "cpu"

df_train, df_valid = sk_model_selection.train_test_split(
    df_t2w,
    test_size = 0.2,
    random_state = 42,
    stratify = df_t2w['label']
)

train_data_retriever = DataCustomer(
    df_train["file_paths"].values, 
    df_train["label"].values, 
)

valid_data_retriever = DataCustomer(
    df_valid["file_paths"].values, 
    df_valid["label"].values,
)

train_loader = torch_data.DataLoader(
    train_data_retriever,
    batch_size=32,
    shuffle=True,
    num_workers=8,
)

valid_loader = torch_data.DataLoader(
    valid_data_retriever, 
    batch_size=32,
    shuffle=False,
    num_workers=8,
)

model = Model()
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch_functional.binary_cross_entropy_with_logits

trainer = Trainer(
    model, 
    device, 
    optimizer, 
    criterion, 
    LossMeter, 
    AccMeter
)

history = trainer.fit(
    4, 
    train_loader, 
    valid_loader, 
    f"best-model-0.pth", 
    100,
)


# CREATE DATAFRAME FOR TEST SET

In [5]:
IMG_PATH_TEST = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/test"

In [6]:
f = []
for (dirpath, dirnames, filenames) in os.walk(IMG_PATH_TEST):
    f.extend(os.path.join(dirpath, x) for x in filenames)
    
test_file_paths_df = pd.DataFrame({'file_paths': f})
test_file_paths_df['directory'] = IMG_PATH_TEST
test_file_paths_df['dataset'] = test_file_paths_df['file_paths'].str.split("/", n = 7, expand = True)[3]
test_file_paths_df['patient_id'] = test_file_paths_df['file_paths'].str.split("/", n = 7, expand = True)[4]
test_file_paths_df['scan_type'] = test_file_paths_df['file_paths'].str.split("/", n = 7, expand = True)[5]
test_file_paths_df['file'] = test_file_paths_df['file_paths'].str.split("/", n = 7, expand = True)[6]
display(test_file_paths_df.head(2))
test_file_paths_df.shape[0]

Unnamed: 0,file_paths,directory,dataset,patient_id,scan_type,file
0,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,test,114,T2w,Image-4.dcm
1,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,test,114,T2w,Image-2.dcm


51473

# FILTER TESTDATA WITH THRESHOLD = 10 AND ONLY USE T2w SCAN TYPE

In [32]:
test_df=test_file_paths_df[test_file_paths_df['scan_type']=='T2w']

valid_path = test_df['file_paths'].apply(is_valid_image)
test_df = test_df[valid_path]
display(test_df.shape)
test_df.to_csv("test_df.csv", index = False)
test_df.head(2)

(7278, 6)

Unnamed: 0,file_paths,directory,dataset,patient_id,scan_type,file
0,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,test,114,T2w,Image-4.dcm
2,../input/rsna-miccai-brain-tumor-radiogenomic-...,../input/rsna-miccai-brain-tumor-radiogenomic-...,test,114,T2w,Image-3.dcm


In [None]:
models = []
for i in range(1):
    model = Model()
    model.to(device)
    
    checkpoint = torch.load(f"best-model-{0}.pth")
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    models.append(model)

In [33]:
_id = test_df['patient_id'].map(int).tolist()

In [None]:
class TestDataCustomer(torch_data.Dataset):
    def __init__(self, paths):
        self.paths = paths
        
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        data_path = self.paths[index]
        dt = load_dicom(data_path)
        dtt = []
        dtt.append(dt)
        dtt.append(dt)
        dtt.append(dt)

        X = torch.tensor(dtt).float()
        #X = torch.tensor(load_dicom(data_path))
        
        return {"X":X, "id": _id[index]}

In [None]:
submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

test_data_retriever = TestDataCustomer( 
    test_df['file_paths'].values,
)

test_loader = torch_data.DataLoader(
    test_data_retriever,
    batch_size=32,
    shuffle=False,
    num_workers=8,
)

In [None]:
y_pred = []
ids = []

for e, batch in enumerate(test_loader):
    print(f"{e}/{len(test_loader)}", end="\r")
    with torch.no_grad():
        tmp_pred = np.zeros((batch["X"].shape[0], ))
        for model in models:
            tmp_res = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            tmp_pred += tmp_res
        y_pred.extend(tmp_pred)
        ids.extend(batch["id"].numpy().tolist())

In [None]:
resfile = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred})
resfile = resfile.groupby(['BraTS21ID'], as_index = False).median()
resfile
#submission.to_csv("submission.csv", index=False)

In [None]:
# submission = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred})
# submission.to_csv("submission.csv", index=False)

In [None]:
submission = resfile
submission.to_csv("submission.csv", index=False)

In [None]:
plt.figure(figsize=(5, 5))
plt.hist(submission["MGMT_value"]);

In [None]:
submission

## WORK IN PROGRESS...

In [None]:
# class DataRetriever(torch_data.Dataset):
#     def __init__(self, paths):
#         self.paths = paths
          
#     def __len__(self):
#         return len(self.paths)
    
#     def __getitem__(self, index):
#         _id = self.paths[index]
#         patient_path = f"../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/{str(_id).zfill(5)}/"
#         channels = []
#         for t in ("FLAIR", "T1w", "T1wCE"): # "T2w"
#             t_paths = sorted(
#                 glob.glob(os.path.join(patient_path, t, "*")), 
#                 key=lambda x: int(x[:-4].split("-")[-1]),
#             )
#             # start, end = int(len(t_paths) * 0.475), int(len(t_paths) * 0.525)
#             x = len(t_paths)
#             if x < 10:
#                 r = range(x)
#             else:
#                 d = x // 10
#                 r = range(d, x - d, d)
                
#             channel = []
#             # for i in range(start, end + 1):
#             for i in r:
#                 channel.append(cv2.resize(load_dicom(t_paths[i]), (256, 256)) / 255)
#             channel = np.mean(channel, axis=0)
#             channels.append(channel)
        
#         return {"X": torch.tensor(channels).float(), "id": _id}

In [None]:
# submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

# test_data_retriever = TestDataCustomer(
#     submission["BraTS21ID"].values, 
# )

# test_loader = torch_data.DataLoader(
#     test_data_retriever,
#     batch_size=8,
#     shuffle=False,
#     num_workers=8,
# )

In [None]:
def is_valid_image(path, threshold=10):
    data = load_dicom(path)
    if np.mean(data)<threshold:
        return False
    else:
        return True