In [1]:
import gc
import glob
import os
import re
import sys
sys.path.append("../")
sys.path.append("../../")
from src.make_data import effnet_data as  effnet_data
import utils
from src.model import custom_metric
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydicom as dicom
import torch
import torchvision as tv
from sklearn.model_selection import GroupKFold
from torch.cuda.amp import GradScaler, autocast
from torchvision.models.feature_extraction import create_feature_extractor
from tqdm import tqdm
import warnings
import joblib
import wandb
from pathlib import Path

warnings.simplefilter('ignore')
# SET CONFIG Effnet

cfg = utils.load_yaml(Path("../../config/config.yaml"))
#DATA PATH
RSNA_2022_PATH = cfg["data"]["RSNA_2022_PATH"]
TRAIN_IMAGES_PATH = f'{RSNA_2022_PATH}/train_images'
TEST_IMAGES_PATH = f'{RSNA_2022_PATH}/test_images'
EFFNET_CHECKPOINTS_PATH = cfg["data"]["EFFNET_CHECKPOINTS_PATH"]
METADATA_PATH = cfg["data"]["METADATA_PATH"]

WEIGHTS = tv.models.efficientnet.EfficientNet_V2_S_Weights.DEFAULT
N_FOLDS = 3
#Read csv data for slicing
df_train = pd.read_csv(f'{RSNA_2022_PATH}/train.csv')
df_train_slices = pd.read_csv(f'{METADATA_PATH}/train_segmented.csv')
df_test = pd.read_csv(f'{RSNA_2022_PATH}/test.csv')
df_train_box = pd.read_csv(f"{RSNA_2022_PATH}/cropped_2d_labels.csv")

#df_train, df_train_slices, df_test, df_test_slices = effnet_data.preprocess( df_train=df_train,
#    df_train_slices=df_train_slices,
#    df_train_box=df_train_box,
#    df_test=df_test,
#    TEST_IMAGES_PATH=TEST_IMAGES_PATH,
#    N_FOLDS=N_FOLDS,
#)
df_train,df_train_slices,df_test,df_test_slices = effnet_data.preprocess(df_train = df_train,df_train_slices=df_train_slices,df_test=df_test,TEST_IMAGES_PATH=TEST_IMAGES_PATH,N_FOLDS=N_FOLDS)
#PreProcess and Effnetdata
data_path = Path("/home/jumpei.uchida/develop/kaggle_1080ti_1_2/rsna-2022-cervical-spine-fracture-detection/fold0")

In [2]:
pred_path = Path("/home/jumpei.uchida/develop/kaggle_1080ti_2_1/rsna-2022-cervical-spine-fracture-detection-1/effnet/src/saved_model/effnet/effnet_weight2_eval_prediction.csv")
df = pd.read_csv(pred_path)

In [3]:
df.tail()

Unnamed: 0.1,Unnamed: 0,StudyInstanceUID,Slice,new_slice,patient_overall,split,pred
163612,54459,1.2.826.0.1.3680043.9940,255,255,0.0,2.0,0.002424
163613,54460,1.2.826.0.1.3680043.9940,256,256,0.0,2.0,0.001661
163614,54461,1.2.826.0.1.3680043.9940,257,257,0.0,2.0,0.0017
163615,54462,1.2.826.0.1.3680043.9940,258,258,0.0,2.0,0.00214
163616,54463,1.2.826.0.1.3680043.9940,259,259,0.0,2.0,0.002693


In [4]:

def resize_depth(images: np.ndarray, depth, depth_range, enable_depth_resized_with_cv2):
    assert images.ndim >= 3  # (..., depth, h/w, w/h)

    if depth_range is not None:
        assert len(depth_range) == 2
        start_idx, end_idx = np.quantile(np.arange(images.shape[-3]), depth_range).astype(int)
        images = images[..., start_idx:end_idx, :, :]

    if depth is None:
        return images

    #if images.shape[-3] < depth:
    #    warnings.warn("images.shape[-3] < given depth", UserWarning)

    if enable_depth_resized_with_cv2:
        images = images.swapaxes(-3, -2)
        *left_shapes, images_height, images_depth, images_width = images.shape
        images = images.reshape((-1, images_depth, images_width))
        images = np.stack([
            cv2.resize(image, (images_width, depth), interpolation=cv2.INTER_AREA)
            for image in images
        ], axis=0)
        images = images.reshape((*left_shapes, images_height, depth, images_width))
        images = images.swapaxes(-3, -2)
        return images
    else:
        indices = np.quantile(
            np.arange(images.shape[-3]), np.linspace(0, 1, depth)
        ).astype(int)
        return images[..., indices, :, :]

def transforms(temp,custom = True):
    assert temp.ndim == 4
    c_list = []
    for c in range(7):
        temp_list = []
        for c1 in temp[c]:
            temp_list.append(np.sum(c1).astype("float64"))
        temp_list /= max(temp_list)
        c_list.append(temp_list)
    c_array = np.array(c_list)
    if custom:
        for num_i,sum_num in enumerate(np.sum(c_array,axis = 0)):
            if sum_num > 1.0:
                c_array[:,num_i] /= sum_num
    
    return c_array

def resize_origin(uid,i,flag_df):
    vert_cols = [f'C{i}_effnet_vert' for i in range(1, 8)]
    slice = uid_to_slice_map[uid]
    temp = np.load(data_path / f"{uid}.npz",allow_pickle=True)["arr_0"]
    temp = resize_depth(temp,depth = slice,depth_range = None,enable_depth_resized_with_cv2=True)
    temp = transforms(temp)
    temp = np.nan_to_num(temp)
    temp = pd.DataFrame(temp.T,columns = vert_cols)
    temp["StudyInstanceUID"] = uid
    if flag_df.query("StudyInstanceUID == @uid")["is_reversed"].values[0] == 0:
        temp["Slice"] = [i for i in range(1,slice+1)]
    else:
        temp["Slice"] = list(reversed([i for i in range(1,slice+1)]))
    return temp,i

def get_dicom_paths(dicom_dir_path: Path):
    dicom_paths = sorted(
        dicom_dir_path.glob("*"),
        key=lambda p: int(p.name.split(".")[0])
    )
    if (
        dicom.dcmread(dicom_paths[0]).get("ImagePositionPatient")[2]
        >
        dicom.dcmread(dicom_paths[-1]).get("ImagePositionPatient")[2]
    ):
        return dicom_paths[::-1]
    return dicom_paths
def make_path_list(path,i):
    flag = 1
    temp = get_dicom_paths(path)
    if temp[0].parts[-1] == "1.dcm":
        flag =0
    uid = temp[0].parts[-2]
    return [uid,flag],i


df_eval_pred = df.copy()
uid_to_slice_map = df_eval_pred.groupby("StudyInstanceUID")["Slice"].mean().to_dict()
vert_cols = [f'C{i}_effnet_vert' for i in range(1, 8)]

paths = Path("/home/jumpei.uchida/develop/data/rsna/train_images")
path_list = joblib.Parallel(n_jobs=-1)([
    joblib.delayed(make_path_list)(path,i)
    for i,path in tqdm(enumerate(list(paths.iterdir())))])
path_list.sort(key=lambda x: x[1])
path_list = [t[0] for t in path_list]
flag_df = pd.DataFrame(path_list,columns = ["StudyInstanceUID","is_reversed"])

2019it [00:00, 247850.26it/s]


In [5]:

images = joblib.Parallel(n_jobs=-1)([
    joblib.delayed(resize_origin)(uid,i,flag_df)
    for i,uid in tqdm(enumerate(list(uid_to_slice_map.keys())))])

images.sort(key=lambda x: x[1])
images = [t[0] for t in images]

470it [00:00, 193570.59it/s]


error: OpenCV(4.6.0) :-1: error: (-5:Bad argument) in function 'resize'
> Overload resolution failed:
>  - Can't parse 'dsize'. Sequence item with index 1 has a wrong type
>  - Can't parse 'dsize'. Sequence item with index 1 has a wrong type


In [None]:
images

[     C1_effnet_vert  C2_effnet_vert  C3_effnet_vert  C4_effnet_vert  \
 0               0.0             0.0             0.0             0.0   
 1               0.0             0.0             0.0             0.0   
 2               0.0             0.0             0.0             0.0   
 3               0.0             0.0             0.0             0.0   
 4               0.0             0.0             0.0             0.0   
 ..              ...             ...             ...             ...   
 316             0.0             0.0             0.0             0.0   
 317             0.0             0.0             0.0             0.0   
 318             0.0             0.0             0.0             0.0   
 319             0.0             0.0             0.0             0.0   
 320             0.0             0.0             0.0             0.0   
 
      C5_effnet_vert  C6_effnet_vert  C7_effnet_vert  \
 0               0.0             0.0             0.0   
 1               0.0  

In [None]:
df_pred = pd.concat(images)

In [None]:
df_pred.head()

Unnamed: 0,C1_effnet_vert,C2_effnet_vert,C3_effnet_vert,C4_effnet_vert,C5_effnet_vert,C6_effnet_vert,C7_effnet_vert,StudyInstanceUID,Slice
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2.826.0.1.3680043.10032,321
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2.826.0.1.3680043.10032,320
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2.826.0.1.3680043.10032,319
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2.826.0.1.3680043.10032,318
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2.826.0.1.3680043.10032,317


In [None]:
df_predict = df_eval_pred.merge(df_pred,on = ["StudyInstanceUID","Slice"],how ="left")
df_predict[vert_cols] += 0.000000000000000001
for cols in vert_cols:
    df_predict[cols] = df_predict[cols].values * df_predict["pred"].values
df_predict = df_predict.drop("patient_overall",axis = 1).groupby(["StudyInstanceUID","split"]).max().reset_index()
df_predict = df_predict[['StudyInstanceUID', 'split','pred',
       'C1_effnet_vert', 'C2_effnet_vert', 'C3_effnet_vert', 'C4_effnet_vert',
       'C5_effnet_vert', 'C6_effnet_vert', 'C7_effnet_vert']]


In [None]:

df_train  = df_train.drop_duplicates("StudyInstanceUID")

In [None]:
df_predict = df_predict.merge(df_train.drop("split",axis = 1),on = ["StudyInstanceUID"],how ="left")

In [None]:
df_predict

Unnamed: 0,StudyInstanceUID,split,pred,C1_effnet_vert,C2_effnet_vert,C3_effnet_vert,C4_effnet_vert,C5_effnet_vert,C6_effnet_vert,C7_effnet_vert,...,C6,C7,patient_overall,C1_fracture,C2_fracture,C3_fracture,C4_fracture,C5_fracture,C6_fracture,C7_fracture
0,1.2.826.0.1.3680043.10032,0.0,0.3826,0.074046,0.248826,0.257828,0.354916,0.233401,0.197521,0.134526,...,0,0,0,0,0,0,0,0,0,0
1,1.2.826.0.1.3680043.10051,1.0,0.3562,0.184733,0.304713,0.205876,0.130446,0.061770,0.092181,0.183107,...,0,0,1,0,0,0,1,0,0,0
2,1.2.826.0.1.3680043.10062,2.0,0.2593,0.221064,0.185417,0.151000,0.040572,0.046252,0.029320,0.029248,...,0,0,0,0,0,0,0,0,0,0
3,1.2.826.0.1.3680043.10136,2.0,0.3237,0.208844,0.221565,0.234205,0.290045,0.145810,0.038849,0.048022,...,0,0,0,0,0,0,0,0,0,0
4,1.2.826.0.1.3680043.10579,2.0,0.3540,0.244490,0.284416,0.314121,0.308343,0.346481,0.247679,0.186609,...,0,0,1,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,1.2.826.0.1.3680043.9443,1.0,0.3474,0.201115,0.181963,0.203182,0.137253,0.058273,0.025441,0.025339,...,0,0,0,0,0,0,0,0,0,0
466,1.2.826.0.1.3680043.9447,1.0,0.2527,0.039995,0.079040,0.164659,0.205515,0.201350,0.075498,0.102147,...,0,0,1,0,1,0,0,0,0,0
467,1.2.826.0.1.3680043.9809,1.0,0.2844,0.191249,0.177526,0.164502,0.104577,0.014936,0.021473,0.016364,...,0,0,0,0,0,0,0,0,0,0
468,1.2.826.0.1.3680043.9926,2.0,0.3398,0.179099,0.206012,0.141978,0.209774,0.082135,0.219502,0.201343,...,0,0,1,0,1,0,0,0,0,0


In [None]:
target_cols = ["patient_overall"]+[f"C{i}_fracture" for i in range(1,7+1)]
pred_cols = ["pred"]+[f"C{i}_effnet_vert" for i in range(1,7+1)]
valid_list = []
DEVICE ="cuda"
for fold in range(N_FOLDS):
    df_temp  = df_predict.query("split == @fold")
    eval_targets = df_temp[target_cols].values
    eval_predictions = np.stack(df_temp[pred_cols].values.tolist())
    valid_score = custom_metric.weighted_loss(torch.logit(torch.as_tensor(eval_predictions)).to(DEVICE).to(torch.float), torch.as_tensor(eval_targets).to(DEVICE).to(torch.float))
    valid_list.append(valid_score.cpu())
    print(f'Valid_CV score Fold_{fold}:', valid_score)

Valid_CV score Fold_0: tensor(0.6545, device='cuda:0')
Valid_CV score Fold_1: tensor(0.7427, device='cuda:0')
Valid_CV score Fold_2: tensor(0.6757, device='cuda:0')


In [None]:
np.mean(valid_list)

0.69094545

In [None]:
df_predictss = df_predict[['StudyInstanceUID', 'split', 'pred', 'C1_effnet_vert', 'C2_effnet_vert',
       'C3_effnet_vert', 'C4_effnet_vert', 'C5_effnet_vert', 'C6_effnet_vert',
       'C7_effnet_vert']].rename(columns ={f"C{i}_effnet_vert":f"C{i}" for i in range(1,7+1)})
df_predictss = df_predictss.rename(columns = {"pred":"overall","split":"fold"})

In [None]:
df_predictss.to_csv("binary_effnetv2_unet_pred.csv",index = False)