# CCTA Dataset Creation: Ostia Intensity Analysis

---

## 1. Create dataframes collecting the coronary ostia's world coordinates

In [None]:
%cd ../

/home/marco/contrast-gan-3D


In [None]:
from pathlib import Path
from pprint import pprint
from typing import Dict, Iterable, Optional, Tuple, Union

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from monai.visualize.utils import matshow3d
from sklearn.mixture import GaussianMixture as GMM
from sklearn.model_selection import GridSearchCV

from contrast_gan_3D.constants import VMAX, VMIN
from contrast_gan_3D.data import utils as dset_utils
from contrast_gan_3D.utils import geometry, io_utils, logging_utils
from contrast_gan_3D.utils import visualization as viz

logging_utils.set_project_loggers_level(level="INFO")

rng = np.random.default_rng()

contrast_gan_3D.utils.geometry: INFO -> INFO
contrast_gan_3D.utils.io_utils: INFO -> INFO
contrast_gan_3D.data.utils: INFO -> INFO
contrast_gan_3D.utils.visualization: INFO -> INFO


Adjust the variables in the following cell to match your setup. Run this code multiple times to process different datasets.

In [None]:
DATASET_DIRS = [
    Path("/home/marco/data") / p
    for p in ["ASOCA_Philips/images", "MMWHS/ct_train", "MMWHS/ct_test"]
]

CENTERLINES_TOP_DIRS = [dir / "auto_centerlines" for dir in DATASET_DIRS]

# NOTE set to `None` if you don't want to write the dataframe to disk
OSTIA_DF_SAVENAMES = [dir / "ostia_world_coords.xlsx" for dir in DATASET_DIRS]

pprint(DATASET_DIRS)
pprint(CENTERLINES_TOP_DIRS)
pprint(OSTIA_DF_SAVENAMES)

[PosixPath('/home/marco/data/ASOCA_Philips/images'),
 PosixPath('/home/marco/data/MMWHS/ct_train'),
 PosixPath('/home/marco/data/MMWHS/ct_test')]
[PosixPath('/home/marco/data/ASOCA_Philips/images/auto_centerlines'),
 PosixPath('/home/marco/data/MMWHS/ct_train/auto_centerlines'),
 PosixPath('/home/marco/data/MMWHS/ct_test/auto_centerlines')]
[PosixPath('/home/marco/data/ASOCA_Philips/images/ostia_world_coords.xlsx'),
 PosixPath('/home/marco/data/MMWHS/ct_train/ostia_world_coords.xlsx'),
 PosixPath('/home/marco/data/MMWHS/ct_test/ostia_world_coords.xlsx')]


**The following cell might overwrite existing files!**

In [None]:
if False:
    for centerlines_dir, ostia_df_savename in zip(CENTERLINES_TOP_DIRS, OSTIA_DF_SAVENAMES):
        print(f"Centerlines: {str(centerlines_dir)!r} Ostias: {str(ostia_df_savename)!r}")

        ostia_paths = sorted(list(centerlines_dir.glob("*/ostia.xml")))

        ostia_dataframe = dset_utils.create_ostia_dataframe(
            ostia_paths, ostia_sheet_savename=ostia_df_savename
        )
        display(ostia_dataframe.head())

---

## 2. Divide the datapoints in optimal and suboptimal HU contrast values

CCTA scans are divided into *optimal* and *suboptimal* sets based on the contrast medium attenuation values at the 
aortic root. We fit a Mixture of Gaussians model on a 3D patch centered around the aortic root, and identify its 
attenuation value as the component with the highest mean HU value.

This data processing approach is described in 
"[Generative Adversarial Networks for Coronary CT Angiography Acquisition Protocol Correction with Explicit Attenuation Constraints (Van Herten, 2023)](https://openreview.net/pdf?id=MIHF_buWGUQ)"; the following code implementation is adapted from the first author's original codebase.

### 2.1. Create the ostia dataset

Adjust the variables in the following cell to match your setup:

In [None]:
DATASET_NAMES = ["ASOCA", "MMWHS_train", "MMWHS_test"]
OSTIA_DATASET_PATH = Path("/home/marco/data/ostia_world_coords.xlsx")

pprint(list(zip(DATASET_NAMES, OSTIA_DF_SAVENAMES)))
print(OSTIA_DATASET_PATH)

[('ASOCA',
  PosixPath('/home/marco/data/ASOCA_Philips/images/ostia_world_coords.xlsx')),
 ('MMWHS_train',
  PosixPath('/home/marco/data/MMWHS/ct_train/ostia_world_coords.xlsx')),
 ('MMWHS_test',
  PosixPath('/home/marco/data/MMWHS/ct_test/ostia_world_coords.xlsx'))]
/home/marco/data/ostia_world_coords.xlsx


In [None]:
if False:
    ostia_df = pd.concat(
        [
            pd.read_excel(path).assign(dataset=dataset_name)
            for path, dataset_name in zip(OSTIA_DF_SAVENAMES, DATASET_NAMES)
        ]
    ).reset_index(drop=True)
    ostia_df.to_excel(OSTIA_DATASET_PATH, index=False)
    print("Saved ostia dataset")
else:
    ostia_df = pd.read_excel(OSTIA_DATASET_PATH)
    print("Loaded ostia dataset")

ostia_df

Loaded ostia dataset


Unnamed: 0,ID,x,y,z,dataset
0,ASOCA-000,212.899704,211.107513,-103.750,ASOCA
1,ASOCA-000,219.139938,240.228622,-93.125,ASOCA
2,ASOCA-001,177.677750,260.138702,-95.750,ASOCA
3,ASOCA-001,189.806671,287.226593,-83.250,ASOCA
4,ASOCA-002,189.720184,229.135803,-110.125,ASOCA
...,...,...,...,...,...
235,ct_test_2038_image,14.879633,-50.867233,-174.875,MMWHS_test
236,ct_test_2039_image,-4.075738,-47.589798,-115.250,MMWHS_test
237,ct_test_2039_image,8.885210,-15.496021,-105.250,MMWHS_test
238,ct_test_2040_image,16.621979,4.101688,-126.500,MMWHS_test


In [None]:
# **** convert world coordinates dataframe to image ones (convenient for checking manually) ****

OSTIA_DATASET_IMG_COORDS_PATH = OSTIA_DATASET_PATH.parent / "ostia_img_coords.xlsx"

if False:
    ostia_df_img_coords = ostia_df.copy()
    for dataset_folder in DATASET_DIRS:
        h5s = sorted(dataset_folder.glob("*.h5"))
        for path in h5s:
            _, meta, f = io_utils.load_h5_image(path)
            f.close()
            name = io_utils.stem(path)
            coords = ostia_df_img_coords.loc[
                ostia_df_img_coords["ID"] == name, list("xyz")
            ].values
            img_coords = geometry.world_to_image_coords(
                coords, meta["offset"], meta["spacing"]
            )
            ostia_df_img_coords.loc[
                ostia_df_img_coords["ID"] == name, list("xyz")
            ] = img_coords
    ostia_df_img_coords[list("xyz")] = ostia_df_img_coords[list("xyz")].astype(np.int16)
    ostia_df_img_coords.to_excel(OSTIA_DATASET_IMG_COORDS_PATH, index=False)
    print(f"Saved ostia image coordinates dataset: {str(OSTIA_DATASET_IMG_COORDS_PATH)!r}")
else:
    ostia_df_img_coords = pd.read_excel(OSTIA_DATASET_IMG_COORDS_PATH)
    print(f"Loaded ostia image coordinates dataset: {str(OSTIA_DATASET_IMG_COORDS_PATH)!r}")
ostia_df_img_coords

Loaded ostia image coordinates dataset: '/home/marco/data/ostia_img_coords.xlsx'


Unnamed: 0,ID,x,y,z,dataset
0,ASOCA-000,231,219,149,ASOCA
1,ASOCA-000,246,289,166,ASOCA
2,ASOCA-001,181,265,128,ASOCA
3,ASOCA-001,211,332,148,ASOCA
4,ASOCA-002,249,283,174,ASOCA
...,...,...,...,...,...
235,ct_test_2038_image,214,186,189,MMWHS_test
236,ct_test_2039_image,189,199,112,MMWHS_test
237,ct_test_2039_image,231,303,128,MMWHS_test
238,ct_test_2040_image,226,263,184,MMWHS_test


---

### 2.2. Extract 3D patches around the ostia and plot some results

These patches are flattened and fitted with a Gaussian Mixture Model to identify the peak corresponding to the contrast
agent's HU intensity. First let's plot some of them to see what they look like.

In [None]:
SAMPLES_DIR = Path("/home/marco/data/dataset_samples/")
samples = sorted(SAMPLES_DIR.glob("*.h5"))

pprint(samples)

[PosixPath('/home/marco/data/dataset_samples/ASOCA-003.h5'),
 PosixPath('/home/marco/data/dataset_samples/ASOCA-015.h5'),
 PosixPath('/home/marco/data/dataset_samples/ct_test_2010_image.h5'),
 PosixPath('/home/marco/data/dataset_samples/ct_test_2030_image.h5'),
 PosixPath('/home/marco/data/dataset_samples/ct_train_1006_image.h5'),
 PosixPath('/home/marco/data/dataset_samples/ct_train_1014_image.h5')]


In [None]:
# SAVEDIR = None
SAVEDIR = Path("/home/marco/contrast-gan-3D/assets/ostia-patches")
if SAVEDIR is not None:
    SAVEDIR.mkdir(parents=True, exist_ok=True)

for s in samples:
    print(s)

    img, meta, f = io_utils.load_h5_image(s)
    img: np.ndarray = img[::]  # needed for fast_trilinear fancy indexing
    f.close()

    title = io_utils.stem(s)

    if False:
        matshow3d(
            img.swapaxes(-1, 0),
            every_n=20,
            cmap="gray",
            vmin=VMIN,
            vmax=VMAX,
            title=title,
            figsize=(12, 7),
        )
        plt.tight_layout()
        plt.show()
        plt.close()

    ostia_patches, ostia_world_coords = geometry.extract_ostia_patch_3D(
        img, meta, title, ostia_df
    )

    ostia_img_coords = geometry.world_to_image_coords(
        ostia_world_coords, meta["offset"], meta["spacing"]
    )
    title_coords0, title_coords1 = list(map(tuple, ostia_img_coords))

    fig, axes = plt.subplots(2, 3, figsize=(8, 5))
    for ostium, axes_row in zip(ostia_patches, axes):
        axes = viz.plot_ostium_patch(ostium, axes=axes_row)
    fig.suptitle(
        f"{title} coords: {title_coords0} {title_coords1}",
    )
    plt.tight_layout()
    if SAVEDIR is None:
        plt.show()
    else:
        plt.savefig(SAVEDIR / f"{title}_ostia_patches.png")
    plt.close(fig)

    z_middle = ostia_patches.shape[-1] // 2
    axes = viz.plot_image_histogram(
        (ostia_patches[0].ravel(), f"{title} {title_coords0}"),
        (ostia_patches[1].ravel(), f"{title} {title_coords1}"),
        tight=True,
        figsize=(7, 3),
    )
    plt.tight_layout()
    if SAVEDIR is None:
        plt.show()
    else:
        plt.savefig(SAVEDIR / f"{title}_ostia_patches_hist.png")
    plt.close(axes[0, 0].get_figure())

/home/marco/data/dataset_samples/ASOCA-003.h5


/home/marco/data/dataset_samples/ASOCA-015.h5
/home/marco/data/dataset_samples/ct_test_2010_image.h5
/home/marco/data/dataset_samples/ct_test_2030_image.h5
/home/marco/data/dataset_samples/ct_train_1006_image.h5
/home/marco/data/dataset_samples/ct_train_1014_image.h5


---

### 2.3. Create a dataset of ostia mean HU values and standard deviation

In [None]:
def GMM_grid_search(
    ostium_patch_1D: np.ndarray,
    max_n_components: int,
    max_iter: int = 1000,
    random_state: Optional[int] = None,
) -> GridSearchCV:
    return GridSearchCV(
        GMM(max_iter=max_iter, random_state=random_state, covariance_type="full"),
        param_grid={"n_components": range(1, max_n_components + 1)},
        scoring=lambda estimator, X: -estimator.bic(X),
    ).fit(ostium_patch_1D)


def pick_GMM_component(gmm: GMM) -> Tuple[Dict[str, float], pd.DataFrame]:
    mu, std = gmm.means_.squeeze(), np.sqrt(gmm.covariances_.squeeze())
    df = pd.DataFrame(
        np.stack([mu, std, gmm.weights_]).T, columns=["mu", "std", "weights"]
    ).sort_values(by="mu", ascending=False)
    return df.loc[df["std"] <= 150, df.columns != "weights"].iloc[0].to_dict(), df


def compute_ostia_HU_value(
    h5_scan_path: Union[Path, str],
    ostia_df: pd.DataFrame,
    savedir: Optional[Union[Path, str]],
    gmm_max_components: int,
    seed: Optional[int] = None,
    show: bool = True,
) -> Tuple[Dict[str, Union[float, str]], pd.DataFrame, np.ndarray]:
    if savedir is not None:
        savedir = Path(savedir)
        savedir.mkdir(parents=True, exist_ok=True)

    img, meta, f = io_utils.load_h5_image(h5_scan_path)
    img = img[::]
    f.close()
    name = io_utils.stem(h5_scan_path)
    # 1. Sample the ostia patches
    ostia_patches, ostia_world_coords = geometry.extract_ostia_patch_3D(
        img, meta, name, ostia_df
    )
    res = []
    for ostium in ostia_patches.reshape(2, -1, 1):
        # 2. Fit a GMM with a hyperparams search on the flat patches' intensities
        grid_search = GMM_grid_search(ostium, gmm_max_components, random_state=seed)
        gmm = grid_search.best_estimator_
        print("Optimal # components:", gmm.n_components)
        # 3. Select the Gaussian component with reasonable std and highest mean HU values
        hu_peak, df = pick_GMM_component(gmm)
        res.append(({"ID": name} | hu_peak, df))
        print(res[-1][0])

    # *** Plotting ***
    ostia_img_coords = geometry.world_to_image_coords(
        ostia_world_coords, meta["offset"], meta["spacing"]
    )
    title_coords0, title_coords1 = list(map(tuple, ostia_img_coords))

    fig, axes = plt.subplots(2, 5, figsize=(16, 6))
    viz.plot_image_histogram(
        (ostia_patches[0].ravel(), str(title_coords0)),
        (ostia_patches[1].ravel(), str(title_coords1)),
        figsize=(6, 4),
        axes=axes[:, 0],
    )
    for ostium_patch, axes_row, (hu_peak, df) in zip(ostia_patches, axes, res):
        viz.plot_gmm_fitted_ostium_patch(
            ostium_patch,
            *df.values.T[..., None],
            gmm_max_components,
            axes=axes_row[1:],
            title=f"mu: {hu_peak['mu']:.2f} std: {hu_peak['std']:.2f}",
        )
    fig.suptitle(f"{name}, columns 1-4: Z middle, 5: full 3D patch")
    plt.tight_layout()
    if savedir is not None:
        plt.savefig(savedir / f"{name}_ostia_HU.png")
    if show:
        plt.show()
    plt.close(fig)

    return *zip(*res), axes

Adjust the variables in the following cell to match your setup:

In [None]:
from itertools import chain

from contrast_gan_3D.config import DEFAULT_SEED

DATA_DIRS = [
    Path(f"/home/marco/data/{d}")
    for d in ["ASOCA_Philips/images", "MMWHS/ct_train", "MMWHS/ct_test"]
]

HU_DF_FNAME = Path("/home/marco/data/ostia_HU.xlsx")

PLOTS_SAVEDIR = "/home/marco/contrast-gan-3D/assets/ostia_HU_GMM/"

SEED = DEFAULT_SEED

GMM_N_MIXTURES = 6

SHOW = False

scans = sorted(chain(*[list(d.glob("*.h5")) for d in DATA_DIRS]))

print(len(scans))
print(HU_DF_FNAME)
print(PLOTS_SAVEDIR)
print(GMM_N_MIXTURES)
print(SHOW)

ostia_df

120
/home/marco/data/ostia_HU.xlsx
/home/marco/contrast-gan-3D/assets/ostia_HU_GMM/
6
False


Unnamed: 0,ID,x,y,z,dataset
0,ASOCA-000,212.899704,211.107513,-103.750,ASOCA
1,ASOCA-000,219.139938,240.228622,-93.125,ASOCA
2,ASOCA-001,177.677750,260.138702,-95.750,ASOCA
3,ASOCA-001,189.806671,287.226593,-83.250,ASOCA
4,ASOCA-002,189.720184,229.135803,-110.125,ASOCA
...,...,...,...,...,...
235,ct_test_2038_image,14.879633,-50.867233,-174.875,MMWHS_test
236,ct_test_2039_image,-4.075738,-47.589798,-115.250,MMWHS_test
237,ct_test_2039_image,8.885210,-15.496021,-105.250,MMWHS_test
238,ct_test_2040_image,16.621979,4.101688,-126.500,MMWHS_test


**The following cell might overwrite existing files!**

In [None]:
# %%time

if False:
    ostia_HU_df = []
    for s in scans:
        print(s)
        hu_peaks, *_ = compute_ostia_HU_value(
            s,
            ostia_df,
            PLOTS_SAVEDIR,
            GMM_N_MIXTURES,
            seed=SEED,
            show=SHOW
        )
        print("---")
        ostia_HU_df += hu_peaks

    ostia_HU_df = pd.DataFrame(ostia_HU_df)
    ostia_HU_df.to_excel(HU_DF_FNAME, index=False)
    print(f"Saved HU dataframe to '{HU_DF_FNAME}'")
else:
    ostia_HU_df = pd.read_excel(HU_DF_FNAME)
    print("Loaded ostia HU mean and std")

ostia_HU_df

Loaded ostia HU mean and std


Unnamed: 0,ID,mu,std
0,ASOCA-000,411.801545,38.914087
1,ASOCA-000,417.637377,39.911285
2,ASOCA-001,471.351123,38.684994
3,ASOCA-001,482.516019,29.968211
4,ASOCA-002,544.765628,29.733012
...,...,...,...
235,ct_train_1018_image,566.599794,110.788253
236,ct_train_1019_image,262.567222,39.186438
237,ct_train_1019_image,274.989892,76.035294
238,ct_train_1020_image,473.588226,108.706747


---

### 3. Create a final dataframe with all the extracted information and datapoints labels 

In [None]:
def join_dfs(*dfs) -> pd.DataFrame:
    out = pd.concat(
        [df.sort_values(by="ID").reset_index(drop=True) for df in dfs], axis=1
    )
    return out.loc[:, ~out.columns.duplicated()].copy()


def add_paths(df: pd.DataFrame, paths: Iterable[Union[Path, str]]) -> pd.DataFrame:
    df = df.copy()
    assert len(df) == len(paths)
    for i, p in enumerate(paths):
        assert df.iloc[i]["ID"] == io_utils.stem(p)
    df["path"] = paths
    return df


FINAL_DF_FNAME = Path("/home/marco/data/ostia_final.xlsx")
FINAL_DF_IMG_FNAME = FINAL_DF_FNAME.parent / (
    io_utils.stem(FINAL_DF_FNAME) + "_img.xlsx"
)

if False:
    final_df = dset_utils.label_ccta_scan(join_dfs(ostia_df, ostia_HU_df))
    final_df = add_paths(final_df, scans)
    final_df.to_excel(FINAL_DF_FNAME, index=False)
    print(f"Saved final ostia dataframe to {str(FINAL_DF_FNAME)!r}")

    final_df_img = dset_utils.label_ccta_scan(
        join_dfs(ostia_df_img_coords, ostia_HU_df)
    )
    final_df_img = add_paths(final_df_img, scans)
    final_df_img.to_excel(FINAL_DF_IMG_FNAME, index=False)
else:
    final_df_img = pd.read_excel(FINAL_DF_IMG_FNAME)
    print(f"Loaded final ostia dataframe from {str(FINAL_DF_FNAME)!r}")

# display(final_df_img)

print()
for lab in sorted(final_df_img["label"].unique()):
    lab_cases = len(final_df_img.loc[final_df_img["label"] == lab])
    lab_fraction = lab_cases / len(final_df_img)
    print(f"Label {lab}: {lab_fraction:.3f} ({lab_cases} ostia)")

print("\nBreakdown by dataset:")
for dataset in final_df_img["dataset"].unique():
    for lab in sorted(final_df_img["label"].unique()):
        subs = final_df_img.loc[
            (final_df_img["dataset"] == dataset) & (final_df_img["label"] == lab), :
        ]
        print(dataset, "label", lab, "len", len(subs))
        display(subs[:10])
# final_df_img[["ID", "dataset", "label"]].groupby(["dataset", "label"]).count()

Loaded final ostia dataframe from '/home/marco/data/ostia_final.xlsx'

Label -1: 0.083 (10 ostia)
Label 0: 0.567 (68 ostia)
Label 1: 0.350 (42 ostia)

Breakdown by dataset:
ASOCA label -1 len 6


Unnamed: 0,ID,x,y,z,dataset,mu,std,label,path
29,ASOCA-029,244,272,192,ASOCA,273.977105,30.461825,-1,/home/marco/data/ASOCA_Philips/images/ASOCA-02...
30,ASOCA-030,219,276,177,ASOCA,285.105477,27.802981,-1,/home/marco/data/ASOCA_Philips/images/ASOCA-03...
34,ASOCA-034,225,352,185,ASOCA,285.912147,25.864995,-1,/home/marco/data/ASOCA_Philips/images/ASOCA-03...
47,ASOCA-047,235,255,168,ASOCA,280.895817,24.402901,-1,/home/marco/data/ASOCA_Philips/images/ASOCA-04...
52,ASOCA-052,189,229,156,ASOCA,278.077412,39.680528,-1,/home/marco/data/ASOCA_Philips/images/ASOCA-05...
59,ASOCA-059,240,215,141,ASOCA,290.697866,51.0318,-1,/home/marco/data/ASOCA_Philips/images/ASOCA-05...


ASOCA label 0 len 31


Unnamed: 0,ID,x,y,z,dataset,mu,std,label,path
0,ASOCA-000,231,219,149,ASOCA,411.801545,38.914087,0,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
1,ASOCA-001,211,332,148,ASOCA,482.516019,29.968211,0,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
5,ASOCA-005,257,285,177,ASOCA,499.879015,35.677081,0,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
7,ASOCA-007,265,240,188,ASOCA,433.869466,35.707495,0,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
8,ASOCA-008,214,225,187,ASOCA,460.968413,33.036155,0,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
9,ASOCA-009,221,265,179,ASOCA,431.659979,33.071831,0,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
14,ASOCA-014,253,281,172,ASOCA,494.713668,27.925255,0,/home/marco/data/ASOCA_Philips/images/ASOCA-01...
18,ASOCA-018,245,303,202,ASOCA,354.409929,28.734544,0,/home/marco/data/ASOCA_Philips/images/ASOCA-01...
19,ASOCA-019,267,270,200,ASOCA,493.164363,44.935987,0,/home/marco/data/ASOCA_Philips/images/ASOCA-01...
20,ASOCA-020,249,311,172,ASOCA,321.612526,53.507392,0,/home/marco/data/ASOCA_Philips/images/ASOCA-02...


ASOCA label 1 len 23


Unnamed: 0,ID,x,y,z,dataset,mu,std,label,path
2,ASOCA-002,249,283,174,ASOCA,544.765628,29.733012,1,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
3,ASOCA-003,245,258,169,ASOCA,507.543314,38.234905,1,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
4,ASOCA-004,220,240,140,ASOCA,565.964696,32.100598,1,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
6,ASOCA-006,218,293,160,ASOCA,604.620757,31.469407,1,/home/marco/data/ASOCA_Philips/images/ASOCA-00...
10,ASOCA-010,278,298,181,ASOCA,523.501385,21.935283,1,/home/marco/data/ASOCA_Philips/images/ASOCA-01...
11,ASOCA-011,242,282,179,ASOCA,548.395856,27.786608,1,/home/marco/data/ASOCA_Philips/images/ASOCA-01...
12,ASOCA-012,238,271,164,ASOCA,676.465494,21.337146,1,/home/marco/data/ASOCA_Philips/images/ASOCA-01...
13,ASOCA-013,245,283,188,ASOCA,584.202409,37.658119,1,/home/marco/data/ASOCA_Philips/images/ASOCA-01...
15,ASOCA-015,258,261,170,ASOCA,549.299168,31.638944,1,/home/marco/data/ASOCA_Philips/images/ASOCA-01...
16,ASOCA-016,263,277,176,ASOCA,574.329456,40.853517,1,/home/marco/data/ASOCA_Philips/images/ASOCA-01...


MMWHS_test label -1 len 3


Unnamed: 0,ID,x,y,z,dataset,mu,std,label,path
62,ct_test_2003_image,224,271,157,MMWHS_test,242.056546,24.606822,-1,/home/marco/data/MMWHS/ct_test/ct_test_2003_im...
78,ct_test_2019_image,260,254,226,MMWHS_test,215.530402,27.383022,-1,/home/marco/data/MMWHS/ct_test/ct_test_2019_im...
83,ct_test_2024_image,172,234,167,MMWHS_test,270.417599,33.5941,-1,/home/marco/data/MMWHS/ct_test/ct_test_2024_im...


MMWHS_test label 0 len 25


Unnamed: 0,ID,x,y,z,dataset,mu,std,label,path
60,ct_test_2001_image,247,180,152,MMWHS_test,397.849436,32.040146,0,/home/marco/data/MMWHS/ct_test/ct_test_2001_im...
63,ct_test_2004_image,216,260,148,MMWHS_test,481.654128,46.400977,0,/home/marco/data/MMWHS/ct_test/ct_test_2004_im...
64,ct_test_2005_image,206,176,169,MMWHS_test,459.142245,21.426214,0,/home/marco/data/MMWHS/ct_test/ct_test_2005_im...
65,ct_test_2006_image,207,169,161,MMWHS_test,415.729334,15.872078,0,/home/marco/data/MMWHS/ct_test/ct_test_2006_im...
67,ct_test_2008_image,237,239,196,MMWHS_test,426.543889,30.666097,0,/home/marco/data/MMWHS/ct_test/ct_test_2008_im...
68,ct_test_2009_image,174,198,207,MMWHS_test,423.607625,19.778494,0,/home/marco/data/MMWHS/ct_test/ct_test_2009_im...
69,ct_test_2010_image,221,260,188,MMWHS_test,445.199074,38.582628,0,/home/marco/data/MMWHS/ct_test/ct_test_2010_im...
70,ct_test_2011_image,235,184,132,MMWHS_test,475.119318,56.745715,0,/home/marco/data/MMWHS/ct_test/ct_test_2011_im...
71,ct_test_2012_image,229,248,178,MMWHS_test,493.479325,23.277013,0,/home/marco/data/MMWHS/ct_test/ct_test_2012_im...
73,ct_test_2014_image,289,218,169,MMWHS_test,406.137187,29.509502,0,/home/marco/data/MMWHS/ct_test/ct_test_2014_im...


MMWHS_test label 1 len 12


Unnamed: 0,ID,x,y,z,dataset,mu,std,label,path
61,ct_test_2002_image,235,226,146,MMWHS_test,532.553771,22.615296,1,/home/marco/data/MMWHS/ct_test/ct_test_2002_im...
66,ct_test_2007_image,241,204,159,MMWHS_test,647.453215,24.534468,1,/home/marco/data/MMWHS/ct_test/ct_test_2007_im...
72,ct_test_2013_image,257,236,172,MMWHS_test,550.865266,27.731804,1,/home/marco/data/MMWHS/ct_test/ct_test_2013_im...
74,ct_test_2015_image,256,256,191,MMWHS_test,667.763922,25.467303,1,/home/marco/data/MMWHS/ct_test/ct_test_2015_im...
75,ct_test_2016_image,233,214,194,MMWHS_test,509.209842,45.554644,1,/home/marco/data/MMWHS/ct_test/ct_test_2016_im...
80,ct_test_2021_image,272,255,132,MMWHS_test,535.438507,31.298023,1,/home/marco/data/MMWHS/ct_test/ct_test_2021_im...
81,ct_test_2022_image,202,195,167,MMWHS_test,526.472954,12.836139,1,/home/marco/data/MMWHS/ct_test/ct_test_2022_im...
82,ct_test_2023_image,228,165,130,MMWHS_test,504.810607,34.2483,1,/home/marco/data/MMWHS/ct_test/ct_test_2023_im...
86,ct_test_2027_image,222,199,141,MMWHS_test,515.768858,27.512826,1,/home/marco/data/MMWHS/ct_test/ct_test_2027_im...
92,ct_test_2033_image,236,236,155,MMWHS_test,529.875195,31.065493,1,/home/marco/data/MMWHS/ct_test/ct_test_2033_im...


MMWHS_train label -1 len 1


Unnamed: 0,ID,x,y,z,dataset,mu,std,label,path
118,ct_train_1019_image,226,195,207,MMWHS_train,262.567222,39.186438,-1,/home/marco/data/MMWHS/ct_train/ct_train_1019_...


MMWHS_train label 0 len 12


Unnamed: 0,ID,x,y,z,dataset,mu,std,label,path
102,ct_train_1003_image,219,282,208,MMWHS_train,458.838156,57.623098,0,/home/marco/data/MMWHS/ct_train/ct_train_1003_...
104,ct_train_1005_image,244,252,130,MMWHS_train,430.494541,28.581185,0,/home/marco/data/MMWHS/ct_train/ct_train_1005_...
105,ct_train_1006_image,250,262,165,MMWHS_train,448.704421,22.785063,0,/home/marco/data/MMWHS/ct_train/ct_train_1006_...
106,ct_train_1007_image,276,186,180,MMWHS_train,447.038414,35.125838,0,/home/marco/data/MMWHS/ct_train/ct_train_1007_...
108,ct_train_1009_image,234,236,207,MMWHS_train,457.412214,26.166861,0,/home/marco/data/MMWHS/ct_train/ct_train_1009_...
109,ct_train_1010_image,222,267,191,MMWHS_train,440.820135,35.950214,0,/home/marco/data/MMWHS/ct_train/ct_train_1010_...
111,ct_train_1012_image,232,265,135,MMWHS_train,407.253263,39.433211,0,/home/marco/data/MMWHS/ct_train/ct_train_1012_...
112,ct_train_1013_image,244,244,162,MMWHS_train,383.440028,29.355579,0,/home/marco/data/MMWHS/ct_train/ct_train_1013_...
113,ct_train_1014_image,286,304,243,MMWHS_train,432.866969,45.033791,0,/home/marco/data/MMWHS/ct_train/ct_train_1014_...
114,ct_train_1015_image,206,229,219,MMWHS_train,460.250775,26.607164,0,/home/marco/data/MMWHS/ct_train/ct_train_1015_...


MMWHS_train label 1 len 7


Unnamed: 0,ID,x,y,z,dataset,mu,std,label,path
100,ct_train_1001_image,252,220,241,MMWHS_train,521.879836,51.027915,1,/home/marco/data/MMWHS/ct_train/ct_train_1001_...
101,ct_train_1002_image,211,185,159,MMWHS_train,534.442577,17.958058,1,/home/marco/data/MMWHS/ct_train/ct_train_1002_...
103,ct_train_1004_image,225,235,154,MMWHS_train,558.179969,26.217872,1,/home/marco/data/MMWHS/ct_train/ct_train_1004_...
107,ct_train_1008_image,243,222,152,MMWHS_train,556.268791,38.442475,1,/home/marco/data/MMWHS/ct_train/ct_train_1008_...
110,ct_train_1011_image,263,266,150,MMWHS_train,514.191496,24.591434,1,/home/marco/data/MMWHS/ct_train/ct_train_1011_...
117,ct_train_1018_image,258,261,142,MMWHS_train,550.968999,44.522588,1,/home/marco/data/MMWHS/ct_train/ct_train_1018_...
119,ct_train_1020_image,234,139,208,MMWHS_train,528.550507,65.28935,1,/home/marco/data/MMWHS/ct_train/ct_train_1020_...


In [None]:
cadrads_df = pd.read_excel("/home/marco/data/IDR_CADRADS/ostia.xlsx")

HU_df_cadrads_labs = dset_utils.label_ccta_scan(cadrads_df, is_cadrads=True)
display(HU_df_cadrads_labs)


for lab in sorted(HU_df_cadrads_labs["label"].unique()):
    lab_cases = len(HU_df_cadrads_labs.loc[HU_df_cadrads_labs["label"] == lab])
    lab_fraction = lab_cases / len(HU_df_cadrads_labs)
    print(f"Label {lab}: {lab_fraction:.3f} ({lab_cases} cases)")

Unnamed: 0,ID,ostium_x,ostium_y,ostium_z,mu,std,label
0,3085,23,-178,1252,573.105302,57.902959,1
1,3086,7,-166,1251,531.562540,52.082457,1
2,3088,10,-153,1157,631.225881,50.576542,1
3,3094,16,-175,1266,551.367908,33.121890,1
4,3095,21,-156,1218,456.655950,46.438315,0
...,...,...,...,...,...,...,...
1274,Contrast3070,13,-262,1469,49.468737,30.930672,-1
1275,Contrast3075,25,-202,1906,346.022804,42.796284,0
1276,Contrast3077,18,-159,1298,446.098845,46.381451,0
1277,Contrast3079,1,-157,1254,488.943719,44.068676,0


Label -1: 0.075 (81 cases)
Label 0: 0.645 (699 cases)
Label 1: 0.280 (303 cases)
