In [2]:
!pip install pyradiomics SimpleITK nibabel scikit-learn xgboost lightgbm timm einops

import os, gc, pickle, warnings, numpy as np, pandas as pd, time
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import timm
import SimpleITK as sitk
import nibabel as nib
from PIL import Image
from radiomics import featureextractor
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import xgboost as xgb
import lightgbm as lgb
warnings.filterwarnings('ignore')
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")


Collecting pyradiomics
  Downloading pyradiomics-3.1.0.tar.gz (34.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.5/34.5 MB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Discarding [4;34mhttps://files.pythonhosted.org/packages/03/c1/20fc2c50ab1e3304da36d866042a1905a2b05a1431ece35448ab6b4578f2/pyradiomics-3.1.0.tar.gz (from https://pypi.org/simple/pyradiomics/)[0m: [33mRequested pyradiomics from https://files.pythonhosted.org/packages/03/c1/20fc2c50ab1e3304da36d866042a1905a2b05a1431ece35448ab6b4578f2/pyradiomics-3.1.0.tar.gz has inconsistent version: expected '3.1.0', but metadata has '3.0.1a1'[0m
  Downloading pyradiomics-3.0.1.tar.gz (34.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.5/34.5 MB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m00:01

In [16]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("/kaggle/input/rsna-2025-intracranial-aneurysm-png-224x224/series_index_mapping.csv")
tf = pd.read_csv("/kaggle/input/rsna-2025-intracranial-aneurysm-png-224x224/train_localizers_with_relative.csv")

# View first few rows
tf.head()


Unnamed: 0,SeriesInstanceUID,SOPInstanceUID,coordinates,location,relative_index,relative_x,relative_y
0,1.2.826.0.1.3680043.8.498.10005158603912009425...,1.2.826.0.1.3680043.8.498.10775329348174902199...,"{'x': 258.3621186176837, 'y': 261.359900373599}",Other Posterior Circulation,162,113.033427,114.344956
1,1.2.826.0.1.3680043.8.498.10022796280698534221...,1.2.826.0.1.3680043.8.498.53868409774237283281...,"{'x': 194.87253141831238, 'y': 178.32675044883...",Right Middle Cerebral Artery,453,85.256732,78.017953
2,1.2.826.0.1.3680043.8.498.10023411164590664678...,1.2.826.0.1.3680043.8.498.24186535344744886473...,"{'x': 189.23979878597123, 'y': 209.19184886465...",Right Middle Cerebral Artery,112,82.792412,91.521434
3,1.2.826.0.1.3680043.8.498.10030095840917973694...,1.2.826.0.1.3680043.8.498.75217084841854214544...,"{'x': 208.2805049088359, 'y': 229.78962131837307}",Right Infraclinoid Internal Carotid Artery,177,91.122721,100.532959
4,1.2.826.0.1.3680043.8.498.10034081836061566510...,1.2.826.0.1.3680043.8.498.71237104731452368587...,"{'x': 249.86745590416498, 'y': 220.623044646393}",Anterior Communicating Artery,46,109.317012,96.522582


In [18]:

DATA_DIR = "/kaggle/input/rsna-2025-intracranial-aneurysm-png-224x224"
CVT_PNG_DIR = os.path.join(DATA_DIR, "cvt_png")
SERIES_MAPPING_PATH = os.path.join(DATA_DIR, "series_index_mapping.csv")
LOCALIZERS_PATH = os.path.join(DATA_DIR, "train_localizers_with_relative.csv")
TRAIN_CSV_PATH = "/kaggle/input/rsna-intracranial-aneurysm-detection/train.csv"

In [22]:
TARGET_COLS = [
    'Left Infraclinoid Internal Carotid Artery',
    'Right Infraclinoid Internal Carotid Artery', 
    'Left Supraclinoid Internal Carotid Artery',
    'Right Supraclinoid Internal Carotid Artery',
    'Left Middle Cerebral Artery',
    'Right Middle Cerebral Artery',
    'Anterior Communicating Artery',
    'Left Anterior Cerebral Artery',
    'Right Anterior Cerebral Artery', 
    'Left Posterior Communicating Artery',
    'Right Posterior Communicating Artery',
    'Basilar Tip',
    'Other Posterior Circulation',
    'Aneurysm Present'
]

In [21]:
print("Loading data...")
train_df = pd.read_csv(TRAIN_CSV_PATH)
series_mapping_df = pd.read_csv(SERIES_MAPPING_PATH)
localizers_df = pd.read_csv(LOCALIZERS_PATH)
#change the head() to look
series_mapping_df.head()

Loading data...


Unnamed: 0,SeriesInstanceUID,SOPInstanceUID,dicom_filename,relative_index,Modality
0,1.2.826.0.1.3680043.8.498.10004044428023505108...,1.2.826.0.1.3680043.8.498.56949904638593632206...,/kaggle/input/rsna-intracranial-aneurysm-detec...,0,MRA
1,1.2.826.0.1.3680043.8.498.10004044428023505108...,1.2.826.0.1.3680043.8.498.12396711188070994245...,/kaggle/input/rsna-intracranial-aneurysm-detec...,1,MRA
2,1.2.826.0.1.3680043.8.498.10004044428023505108...,1.2.826.0.1.3680043.8.498.27571397853195038984...,/kaggle/input/rsna-intracranial-aneurysm-detec...,2,MRA
3,1.2.826.0.1.3680043.8.498.10004044428023505108...,1.2.826.0.1.3680043.8.498.60143101667068651693...,/kaggle/input/rsna-intracranial-aneurysm-detec...,3,MRA
4,1.2.826.0.1.3680043.8.498.10004044428023505108...,1.2.826.0.1.3680043.8.498.45662927574100362473...,/kaggle/input/rsna-intracranial-aneurysm-detec...,4,MRA


In [None]:
import os
import glob
import numpy as np
from PIL import Image

def stack_png_series(series_dir, target_size=(224,224)):
    files = sorted(glob.glob(os.path.join(series_dir, "*.png")))
    assert len(files) > 0, f"No PNG files found in {series_dir}"
    slices = []
    for file in files:
        img = Image.open(file).convert("L")
        img = img.resize(target_size)
        arr = np.array(img, dtype=np.float32) / 255.0  # normalize to [0,1]
        slices.append(arr)
    volume = np.stack(slices, axis=0)  # shape: (num_slices, H, W)
    return volume

# Example: iterate all arteries and all subjects
root_dir = "/kaggle/input/rsna-2025-intracranial-aneurysm-png-224x224/cvt_png"
arteries = os.listdir(root_dir)
for artery in arteries:
    artery_dir = os.path.join(root_dir, artery)
    if not os.path.isdir(artery_dir): continue
    series_folders = os.listdir(artery_dir)
    for series in series_folders:
        series_dir = os.path.join(artery_dir, series)
        if not os.path.isdir(series_dir): continue
        vol = stack_png_series(series_dir)
        print(f"Loaded {artery}/{series} 3D volume shape: {vol.shape}")
        # Optional: load segmentation with same SeriesInstanceUID
        # seg_path = f"/kaggle/input/rsna-intracranial-aneurysm-detection/segmentations/{series}.nii"


Loaded Other Posterior Circulation/1.2.826.0.1.3680043.8.498.10607580708371334840797048741181101985 3D volume shape: (38, 224, 224)
Loaded Other Posterior Circulation/1.2.826.0.1.3680043.8.498.12663099737884495675525119454913855379 3D volume shape: (27, 224, 224)
Loaded Other Posterior Circulation/1.2.826.0.1.3680043.8.498.74320263516357081096176883639948081235 3D volume shape: (232, 224, 224)
Loaded Other Posterior Circulation/1.2.826.0.1.3680043.8.498.10005158603912009425635473100344077317 3D volume shape: (276, 224, 224)
Loaded Other Posterior Circulation/1.2.826.0.1.3680043.8.498.82641698422464356104108563099150990855 3D volume shape: (867, 224, 224)
Loaded Other Posterior Circulation/1.2.826.0.1.3680043.8.498.11019101980573889157112037207769236902 3D volume shape: (127, 224, 224)
Loaded Other Posterior Circulation/1.2.826.0.1.3680043.8.498.88470921398186621059437334583794632704 3D volume shape: (364, 224, 224)
Loaded Other Posterior Circulation/1.2.826.0.1.3680043.8.498.3194554449

(83, 224, 224)
