In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix
from sklearn.utils import resample
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
import torch 
from argparse import Namespace
from tqdm import tqdm
import pickle 
import glob 
import ast

# Add the path to the directory containing the sybil module
sys.path.append('/workspace/home/tengyuezhang/sybil_cect/code/Sybil/')
from sybil.utils.metrics import concordance_index, get_survival_metrics
from sybil import Sybil, Serie


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
num_threads = os.cpu_count() // 5

In [7]:
lndb_with_labels_path = '/workspace/home/tengyuezhang/sybil_cect/data/lndb_labels.csv'
output_path = '/workspace/home/tengyuezhang/sybil_cect/results/lndb_risk_scores.csv'
vis_dir_path = "/workspace/home/tengyuezhang/sybil_cect/visualizations/lndb_attention_maps"
save_atten_maps = False
if not os.path.exists(vis_dir_path):
    os.makedirs(vis_dir_path)

In [4]:
# Initialize the Sybil model
model = Sybil("sybil_ensemble")
num_years = 6



In [5]:
# Load the CSV file
all_cases = pd.read_csv(lndb_with_labels_path)
df = all_cases

In [8]:
for i in range(num_years):
    df[f'pred_risk_year_{i}'] = np.nan

for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing cases"):
    
    png_dir = row['png_dir']
    event = row['event']
    years_to_event = 1
    pid = row['LNDbID']
    spacings_str = row['spacings']
    spacings_tuple = ast.literal_eval(spacings_str)
    spacings = list(spacings_tuple)
    png_list = glob.glob(png_dir + '/*')
    png_list.sort() 
    
    serie = Serie(png_list, file_type='png', voxel_spacing=spacings, label=event, censor_time=years_to_event)
    
    results = model.predict([serie], return_attentions=True, threads=num_threads)
        
    # Update the risk scores columns for the current row
    for i in range(num_years):
        df.at[index, f'pred_risk_year_{i}'] = results.scores[0][i]
        
    # Save the updated DataFrame to the output CSV file at each iteration
    df.to_csv(output_path, index=False)
    
    # Save attention maps 
    if save_atten_maps: 
        attentions = results.attentions

        series_with_attention = visualize_attentions(
            serie,
            attentions = attentions,
            pid = pid, 
            save_directory = vis_dir_path,
            gain = 1, 
        )

Processing cases: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 231/231 [17:25<00:00,  4.53s/it]


In [9]:
scores

[]