In [1]:

import gc
import glob
import os
import re
import sys
sys.path.append("../")
sys.path.append("../../")
from src.make_data import effnet_data
import utils
from src.model import custom_metric
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydicom as dicom
import torch
import torchvision as tv
from sklearn.model_selection import GroupKFold
from torch.cuda.amp import GradScaler, autocast
from torchvision.models.feature_extraction import create_feature_extractor
from tqdm import tqdm
import warnings
import joblib
import wandb
from pathlib import Path

warnings.simplefilter('ignore')
# SET CONFIG Effnet

cfg = utils.load_yaml(Path("../../config/config.yaml"))
#DATA PATH
RSNA_2022_PATH = cfg["data"]["RSNA_2022_PATH"]
TRAIN_IMAGES_PATH = f'{RSNA_2022_PATH}/train_images'
TEST_IMAGES_PATH = f'{RSNA_2022_PATH}/test_images'
EFFNET_CHECKPOINTS_PATH = cfg["data"]["EFFNET_CHECKPOINTS_PATH"]
METADATA_PATH = cfg["data"]["METADATA_PATH"]

#PARAMETER OF EFFNET
EFFNET_MAX_TRAIN_BATCHES = int(cfg["model"]["EFFNET_MAX_TRAIN_BATCHES"])
EFFNET_MAX_EVAL_BATCHES = int(cfg["model"]["EFFNET_MAX_EVAL_BATCHES"])
ONE_CYCLE_MAX_LR = float(cfg["model"]["ONE_CYCLE_MAX_LR"])
ONE_CYCLE_PCT_START = float(cfg["model"]["ONE_CYCLE_PCT_START"])
SAVE_CHECKPOINT_EVERY_STEP = int(cfg["model"]["SAVE_CHECKPOINT_EVERY_STEP"])
FRAC_LOSS_WEIGHT = float(cfg["model"]["FRAC_LOSS_WEIGHT"])
PREDICT_MAX_BATCHES = float(cfg["model"]["PREDICT_MAX_BATCHES"])
N_FOLDS = int(cfg["model"]["N_FOLDS"])
ONE_CYCLE_EPOCH = int(cfg["model"]["ONE_CYCLE_EPOCH"])
SEED = int(cfg["model"]["SEED"])
WEIGHTS = tv.models.efficientnet.EfficientNet_V2_S_Weights.DEFAULT

# Common
PROJECT_NAME = cfg["base"]["PROJECT_NAME"]
MODEL_NAME = cfg["base"]["MODEL_NAME"]


DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
if DEVICE == 'cuda':
    BATCH_SIZE = cfg["model"]["BATCH_SIZE"]
else:
    BATCH_SIZE = 2

#Read csv data for slicing
df_train = pd.read_csv(f'{RSNA_2022_PATH}/train.csv')
df_train_slices = pd.read_csv(f'{METADATA_PATH}/train_segmented.csv')
df_test = pd.read_csv(f'{RSNA_2022_PATH}/test.csv')

#PreProcess and Effnetdata
df_train,df_train_slices,df_test,df_test_slices = effnet_data.preprocess(df_train = df_train,df_train_slices=df_train_slices,df_test=df_test,TEST_IMAGES_PATH=TEST_IMAGES_PATH,N_FOLDS=N_FOLDS)
data_path = Path("/home/jumpei.uchida/develop/kaggle_1080ti_1_2/rsna-2022-cervical-spine-fracture-detection/fold0")

In [2]:

def patient_prediction(df,frac_cols,vert_cols):
    c1c7 = np.average(df[frac_cols].values, axis=0, weights=df[vert_cols].values)
    pred_patient_overall = 1 - np.prod(1 - c1c7)
    return np.concatenate([[pred_patient_overall], c1c7])

def evaluate(df_eval_pred,df_train):
    target_cols = ['patient_overall'] + [f'C{i}_fracture' for i in range(1, 8)]
    frac_cols = [f'C{i}_effnet_frac' for i in range(1, 8)]
    vert_cols = [f'C{i}_effnet_vert' for i in range(1, 8)]

    df_patient_eval_pred = df_eval_pred.groupby('StudyInstanceUID').apply(lambda df: patient_prediction(df,frac_cols=frac_cols,vert_cols=vert_cols)).to_frame('pred').join(df_eval_pred.groupby('StudyInstanceUID')[target_cols].max())


    fold_data = df_train[["StudyInstanceUID","split"]].drop_duplicates().reset_index(drop = True)
    df_patient_eval_pred = df_patient_eval_pred.merge(fold_data,on  = "StudyInstanceUID",how = "left")
    valid_list = []
    for fold in range(N_FOLDS):
        df_temp  = df_patient_eval_pred.query("split == @fold")
        eval_targets = df_temp[target_cols].values
        eval_predictions = np.stack(df_temp.pred.values.tolist())
        valid_score = custom_metric.weighted_loss(torch.logit(torch.as_tensor(eval_predictions)).to(DEVICE).to(torch.float), torch.as_tensor(eval_targets).to(DEVICE).to(torch.float))
        valid_list.append(valid_score.cpu())
        print(f'Valid_CV score Fold_{fold}:', valid_score)
    
    
    print(f'Valid_CV score :',np.mean(np.array(valid_list)))

In [3]:

def resize_depth(images: np.ndarray, depth, depth_range, enable_depth_resized_with_cv2):
    assert images.ndim >= 3  # (..., depth, h/w, w/h)

    if depth_range is not None:
        assert len(depth_range) == 2
        start_idx, end_idx = np.quantile(np.arange(images.shape[-3]), depth_range).astype(int)
        images = images[..., start_idx:end_idx, :, :]

    if depth is None:
        return images

    #if images.shape[-3] < depth:
    #    warnings.warn("images.shape[-3] < given depth", UserWarning)

    if enable_depth_resized_with_cv2:
        images = images.swapaxes(-3, -2)
        *left_shapes, images_height, images_depth, images_width = images.shape
        images = images.reshape((-1, images_depth, images_width))
        images = np.stack([
            cv2.resize(image, (images_width, depth), interpolation=cv2.INTER_AREA)
            for image in images
        ], axis=0)
        images = images.reshape((*left_shapes, images_height, depth, images_width))
        images = images.swapaxes(-3, -2)
        return images
    else:
        indices = np.quantile(
            np.arange(images.shape[-3]), np.linspace(0, 1, depth)
        ).astype(int)
        return images[..., indices, :, :]

In [4]:
def transforms(temp,custom = True):
    assert temp.ndim == 4
    c_list = []
    for c in range(7):
        temp_list = []
        for c1 in temp[c]:
            temp_list.append(np.sum(c1).astype("float64"))
        temp_list /= max(temp_list)
        c_list.append(temp_list)
    c_array = np.array(c_list)
    if custom:
        for num_i,sum_num in enumerate(np.sum(c_array,axis = 0)):
            if sum_num > 1.0:
                c_array[:,num_i] /= sum_num
    
    return c_array

In [5]:
df_eval_pred = pd.read_csv("/home/jumpei.uchida/develop/kaggle_1080ti_1_2/rsna-2022-cervical-spine-fracture-detection/effnet/src/saved_model/effnet/temp_eval_prediction.csv")
uid_to_slice_map = df_eval_pred.groupby("StudyInstanceUID")["Slice"].max().to_dict()
vert_cols = [f'C{i}_effnet_vert' for i in range(1, 8)]

In [6]:
def resize_origin(uid,i,flag_df):
    vert_cols = [f'C{i}_effnet_vert' for i in range(1, 8)]
    slice = uid_to_slice_map[uid]
    temp = np.load(data_path / f"{uid}.npz",allow_pickle=True)["arr_0"]
    temp = resize_depth(temp,depth = slice,depth_range = None,enable_depth_resized_with_cv2=True)
    temp = transforms(temp)
    temp = np.nan_to_num(temp)
    temp = pd.DataFrame(temp.T,columns = vert_cols)
    temp["StudyInstanceUID"] = uid
    if flag_df.query("StudyInstanceUID == @uid")["is_reversed"].values[0] == 0:
        temp["Slice"] = [i for i in range(1,slice+1)]
    else:
        temp["Slice"] = list(reversed([i for i in range(1,slice+1)]))
    return temp,i

def get_dicom_paths(dicom_dir_path: Path):
    dicom_paths = sorted(
        dicom_dir_path.glob("*"),
        key=lambda p: int(p.name.split(".")[0])
    )
    if (
        dicom.dcmread(dicom_paths[0]).get("ImagePositionPatient")[2]
        >
        dicom.dcmread(dicom_paths[-1]).get("ImagePositionPatient")[2]
    ):
        return dicom_paths[::-1]
    return dicom_paths

In [7]:

def make_path_list(path,i):
    flag = 1
    temp = get_dicom_paths(path)
    if temp[0].parts[-1] == "1.dcm":
        flag =0
    uid = temp[0].parts[-2]
    return [uid,flag],i


paths = Path("/home/jumpei.uchida/develop/data/rsna/train_images")
path_list = joblib.Parallel(n_jobs=-1)([
    joblib.delayed(make_path_list)(path,i)
    for i,path in tqdm(enumerate(list(paths.iterdir())))])
path_list.sort(key=lambda x: x[1])
path_list = [t[0] for t in path_list]
flag_df = pd.DataFrame(path_list,columns = ["StudyInstanceUID","is_reversed"])

2019it [00:00, 225676.89it/s]


In [8]:
images = joblib.Parallel(n_jobs=-1)([
    joblib.delayed(resize_origin)(uid,i,flag_df)
    for i,uid in tqdm(enumerate(list(uid_to_slice_map.keys())))])

2018it [00:00, 119581.60it/s]


In [9]:
images.sort(key=lambda x: x[1])
images = [t[0] for t in images]

In [10]:

df_pred = pd.concat(images)
df_pred[vert_cols] += 0.000000000000000001

In [11]:
df_pred

Unnamed: 0,C1_effnet_vert,C2_effnet_vert,C3_effnet_vert,C4_effnet_vert,C5_effnet_vert,C6_effnet_vert,C7_effnet_vert,StudyInstanceUID,Slice
0,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.2.826.0.1.3680043.10001,268
1,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.2.826.0.1.3680043.10001,267
2,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.2.826.0.1.3680043.10001,266
3,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.2.826.0.1.3680043.10001,265
4,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.2.826.0.1.3680043.10001,264
...,...,...,...,...,...,...,...,...,...
250,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.2.826.0.1.3680043.9997,5
251,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.2.826.0.1.3680043.9997,4
252,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.2.826.0.1.3680043.9997,3
253,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.000000e-18,1.2.826.0.1.3680043.9997,2


In [12]:

df_eval_pred = df_eval_pred.drop(vert_cols,axis = 1)
df_eval_pred = df_eval_pred.merge(df_pred,on = ["StudyInstanceUID","Slice"],how = "left")

In [13]:
evaluate(df_eval_pred,df_train)

Valid_CV score Fold_0: tensor(0.5155, device='cuda:0')
Valid_CV score Fold_1: tensor(0.4857, device='cuda:0')
Valid_CV score Fold_2: tensor(0.4917, device='cuda:0')
Valid_CV score : 0.4976275
