In [58]:
import gc
import glob
import os
import re
import sys
sys.path.append("../")
sys.path.append("../../")
from src.make_data import effnet_binary_data as effnet_data
from src.model import effnetv2_1
import utils
from src.model import custom_metric
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydicom as dicom
import torch
from torch.cuda.amp import GradScaler, autocast
from torchmetrics import AUROC
import torchvision as tv
from torchvision.models.feature_extraction import create_feature_extractor
from tqdm import tqdm
import warnings
import joblib
import wandb
from pathlib import Path

cfg = utils.load_yaml(Path("../../config/config.yaml"))
#DATA PATH
RSNA_2022_PATH = cfg["data"]["RSNA_2022_PATH"]
TRAIN_IMAGES_PATH = f'{RSNA_2022_PATH}/train_images'
TEST_IMAGES_PATH = f'{RSNA_2022_PATH}/test_images'
EFFNET_CHECKPOINTS_PATH = cfg["data"]["EFFNET_CHECKPOINTS_PATH"]
METADATA_PATH = cfg["data"]["METADATA_PATH"]

WEIGHTS = tv.models.efficientnet.EfficientNet_V2_S_Weights.DEFAULT
N_FOLDS = 3
#Read csv data for slicing
df_train = pd.read_csv(f'{RSNA_2022_PATH}/train.csv')
df_train_slices = pd.read_csv(f'{METADATA_PATH}/train_segmented.csv')
df_test = pd.read_csv(f'{RSNA_2022_PATH}/test.csv')
df_train_box = pd.read_csv(f"{RSNA_2022_PATH}/cropped_2d_labels.csv")

#df_train, df_train_slices, df_test, df_test_slices = effnet_data.preprocess( df_train=df_train,
#    df_train_slices=df_train_slices,
#    df_train_box=df_train_box,
#    df_test=df_test,
#    TEST_IMAGES_PATH=TEST_IMAGES_PATH,
#    N_FOLDS=N_FOLDS,
#)
#PreProcess and Effnetdata
data_path = Path("/home/jumpei.uchida/develop/kaggle_1080ti_1_2/rsna-2022-cervical-spine-fracture-detection/fold0")
effnet_models = []
check_points ="/home/jumpei.uchida/develop/kaggle_1080ti_1_2/rsna-2022-cervical-spine-fracture-detection/effnet/src/saved_model/effnet"

In [59]:

# detect reversed slice uid
uid_list = list(df_train_box["StudyInstanceUID"].unique())
df_train = df_train.query("StudyInstanceUID != @uid_list & patient_overall == 1")[
    ["StudyInstanceUID", "patient_overall"]
].reset_index(drop=True)

train_uid = list(df_train["StudyInstanceUID"].unique())
df_train = (
    df_train_slices[["StudyInstanceUID", "Slice"]]
    .query("StudyInstanceUID == @train_uid")
    .merge(df_train, on="StudyInstanceUID", how="left")
)
# reject bug StudyInstanceUID
df_train = df_train.query(
    'StudyInstanceUID != "1.2.826.0.1.3680043.20574"'
).reset_index(drop=True)

In [60]:
def evaluate_effnet(model: effnetv2_1.EffnetModel, ds, max_batches=1000000000000000, shuffle=False,DEVICE ="cuda"):
    torch.manual_seed(42)
    model = model.to(DEVICE)
    dl_test = torch.utils.data.DataLoader(ds, batch_size=32, shuffle=shuffle, num_workers=os.cpu_count(),
                                          collate_fn=utils.filter_nones)
    pred_frac = []
    valid_list = []
    auroc = AUROC(pos_label=1)
    with torch.no_grad():
        model.eval()
        frac_losses = []
        with tqdm(dl_test, desc='Eval', miniters=1) as progress:
            for i, (X, y_frac) in enumerate(progress):
                with autocast():
                    y_frac_pred= model.forward(X.to(DEVICE))
                    #Binary Cross Entoropy
                    frac_loss = torch.nn.functional.binary_cross_entropy_with_logits(y_frac_pred.to(DEVICE),y_frac.to(DEVICE),reduction='none')
                    valid_score = auroc(torch.sigmoid(y_frac_pred).to(DEVICE),y_frac.to(DEVICE).to(torch.int64))
                    valid_list.append(valid_score.cpu())
                    pred_frac.append(torch.sigmoid(y_frac_pred))
                    frac_losses.append(torch.mean(frac_loss).cpu())
                if i >= max_batches:
                    break
        return np.mean(frac_losses), torch.concat(pred_frac).cpu().numpy(),np.mean(valid_list)

def gen_effnet_predictions(effnet_models, df_train,EFFNET_CHECKPOINTS_PATH = EFFNET_CHECKPOINTS_PATH):
    if os.path.exists(os.path.join(EFFNET_CHECKPOINTS_PATH, 'train_{PROJECT_NAME}_{MODEL_NAME}_predictions.csv')):
        print('Found cached version of train_predictions.csv')
        df_eval_effnet_pred = pd.read_csv(os.path.join(EFFNET_CHECKPOINTS_PATH, 'eval_{PROJECT_NAME}_{MODEL_NAME}_predictions.csv'))
    else:
        df_eval_predictions = []
        with tqdm(enumerate(effnet_models), total=len(effnet_models), desc='Folds') as progress:
            for fold, effnet_model in progress:
                ds_eval = effnet_data.EffnetDataSet(df_train, TRAIN_IMAGES_PATH, WEIGHTS.transforms())

                #valid_prediction
                eval_frac_loss, eval_effnet_pred_frac,eval_valid_score = evaluate_effnet(effnet_model, ds_eval, 32)
                progress.set_description(f'Fold loss:{eval_frac_loss:.02f}, Fold score:{eval_valid_score:.02f}')
                df_eval_effnet_pred = pd.DataFrame(data=eval_effnet_pred_frac,
                                              columns=["pred"])

                df_eval = pd.concat(
                    [df_train.head(len(df_eval_effnet_pred)).reset_index(drop=True), df_eval_effnet_pred],
                    axis=1
                ).sort_values(['StudyInstanceUID', 'Slice'])

                df_eval.to_csv(f'{EFFNET_CHECKPOINTS_PATH}/FOLD{fold}_remained_eval_prediction.csv')

        #df_train_predictions,
    return df_eval_predictions

In [61]:

effnet_models = [utils.load_model(effnetv2_1.EffnetModel(), f'effnet_second-f{i}', check_points) for i in range(3)]

In [62]:
gen_effnet_predictions(effnet_models=effnet_models,df_train = df_train,EFFNET_CHECKPOINTS_PATH=check_points)

Eval:   0%|          | 32/7597 [00:13<51:27,  2.45it/s]
Eval:   0%|          | 32/7597 [00:12<50:43,  2.49it/s]00:13<00:26, 13.15s/it]
Eval:   0%|          | 32/7597 [00:12<51:13,  2.46it/s]00:26<00:13, 13.04s/it]
Fold loss:4.60, Fold score:0.00: 100%|██████████| 3/3 [00:39<00:00, 13.07s/it]


[]