In [4]:
import numpy as np
import scipy.ndimage as ndi
import nibabel as nib
import os

# function that loads an h5 file and zooms it using scipy zoom function and returns the zoomed image
def zoom_image(image, zoom_factor):
    # zoom the image using the zoom function from scipy
    new_image = ndi.zoom(image, zoom_factor, order=3)
    # return the new image
    return new_image

base = "/media/lm/Samsung_T5/Uni/Medml/training"
datapath = f"{base}/train/test_out"
datapath_out = f"{base}/train/test_out_adjust"

orig_path = f"{base}/train"

if not os.path.exists(datapath_out):
    os.makedirs(datapath_out)

files = os.listdir(datapath)
files = list(map(lambda x: x.split("_")[0], files))
files = list(set(files))

print(files)

for file in files:
    label = nib.load(os.path.join(orig_path, file + "_masks.nii.gz"))
    sum_ = None
    for fac in [1.5, 1.25, 1, 0.75, 0.5, 0.3]:
        print("factor: ", fac)
        dev = nib.load(os.path.join(datapath, file + "_" + str(fac) + "_dev.nii.gz")).get_fdata()
        pred = nib.load(os.path.join(datapath, file + "_" + str(fac) + "_pred.nii.gz")).get_fdata()

        pred /= dev

        pred_zoom = zoom_image(pred, 1 / fac)
        pred_zoom = pred_zoom[:256, :256, :220]
        nib.save(nib.Nifti1Image(pred_zoom, label.affine, label.header), os.path.join(datapath_out, f"{file}_{fac}_rescaled.nii.gz"))

        if sum_ is None:
            sum_ = pred_zoom[:256, :256, :220]
        else:
            sum_ += pred_zoom[:256, :256, :220]

    nib.save(nib.Nifti1Image(sum_, label.affine, label.header), os.path.join(datapath_out, file + "_sum_rescaled.nii.gz"))
    break


['A074', 'A086', 'A038', 'A001', 'A119', 'A070', 'A123', 'A032', 'A097', 'A094', 'A057', 'A076', 'A135', 'A066', 'A126', 'A060', 'A015', 'A091', 'A059', 'A014', 'A134', 'A043', 'A103', 'A083', 'A044', 'A078', 'A113', 'A108', 'A092', 'A017', 'A071', 'A085', 'A098', 'PA6', 'A056', 'A008', 'A084', 'A064', 'A029', 'A121', 'A012', 'A105', 'A003', 'A100', 'A089', 'A082', 'PA5', 'A024', 'A112', 'A096', 'A041', 'A062', 'A028', 'A087', 'A040', 'A081', 'A050', 'A026', 'A095', 'A027', 'A130', 'A051', 'A080', 'A079', 'A010', 'A093', 'A133', 'A138', 'A046', 'A067', 'A077']
factor:  1.5
factor:  1.25
factor:  1


KeyboardInterrupt: 

In [120]:
import threading
datapath_out = f"{base}/train/test_out_adjust"

orig_path = f"{base}/train"

if not os.path.exists(datapath_out):
    os.makedirs(datapath_out)

files = os.listdir(datapath)
files = list(map(lambda x: "_".join(x.split("_")[:-2]), files))
files = list(set(files))

print(files)

def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

# Jaccard
# Volume von orig aneu, volume von predicted aneu
# Detected / Nicht detected

def calc_aneu_stats(file, aneu_idx, aneus_labeled, pred_labeled, num_aneus, num_aneus_pred):
    cur = aneus_labeled == aneu_idx

    aneu_volume = cur.sum()


    pred_aneus = np.unique(pred_labeled[cur])

    num_predicted_aneus = len(pred_aneus) - 1

    pred_idx = -1
    if num_predicted_aneus == 0:
        jaccard = 0
        pred_aneus_volume = 0
    elif num_predicted_aneus > 1:
        jaccard = 0
        pred_aneus_volume = 0
    else:
        pred_idx = pred_aneus[1]
        cur_pred = pred_labeled == pred_idx

        jaccard = np.logical_and(cur, cur_pred).sum() / np.logical_or(cur, cur_pred).sum()

        pred_aneus_volume = cur_pred.sum()

    return [file, aneu_idx, pred_idx,  num_predicted_aneus, aneu_volume, pred_aneus_volume, jaccard, num_aneus, num_aneus_pred]


def calc_single_aneus_pred(pred):
    pred_labeled, num_single_aneus_pred = ndi.label(pred)

    keep = []
    for aneu_idx in range(1, num_single_aneus_pred + 1):
        cur = pred_labeled == aneu_idx
        if cur.sum() <= 60:
            print("Filtered out")
            pred_labeled[cur] = 0
        else:
            keep.append(aneu_idx)

    for idx, aneu_idx in enumerate(keep):
        pred_labeled[pred_labeled == aneu_idx] = idx + 1

    return pred_labeled, len(keep)

def calc_stats(thresh):
    mutex = threading.Lock()

    stats_per_aneu = []
    stats_per_pred = []
    stats_per_file = []

    def handle_file(file):
        label = nib.load(os.path.join(orig_path, file + "_masks.nii.gz")).get_fdata()
        label = label[:256, :256, :220]
        sum_ = nib.load(os.path.join(datapath_out, file + "_sum_rescaled.nii.gz")).get_fdata()

        pred = sum_ >= thresh

        aneus_labeled, num_single_aneus = ndi.label(label)
        pred_labeled, num_single_aneus_pred = calc_single_aneus_pred(pred)

        for i in range(1, num_single_aneus + 1):
            stats = calc_aneu_stats(file, i, aneus_labeled, pred_labeled, num_single_aneus, num_single_aneus_pred)

            mutex.acquire()
            stats_per_aneu.append(stats)
            mutex.release()

        for i in range(1, num_single_aneus_pred + 1):
            stats = calc_aneu_stats(file, i, pred_labeled, aneus_labeled, num_single_aneus_pred, num_single_aneus)
            mutex.acquire()
            stats_per_pred.append(stats)
            mutex.release()

        # Per File Stats

        mutex.acquire()

        overall_jacard = np.logical_and(label, pred).sum() / np.logical_or(label, pred).sum()

        stats_per_file.append([file, num_single_aneus, num_single_aneus_pred, overall_jacard])
        mutex.release()

    for b_idx, batch_ in enumerate(batch(files, 8)):
        print(f"Batch {b_idx} / {len(files) // 8 + 1}")

        threads = []
        for idx, file in enumerate(batch_):
            print(f"{b_idx * 8 + idx}/{len(files)}: {file}")

            thread = threading.Thread(target=handle_file, args=(file,))
            thread.start()
            threads.append(thread)

        for thread in threads:
            thread.join()

    return stats_per_aneu, stats_per_pred, stats_per_file


['A074', 'A086', 'A130_L', 'A001', 'A119', 'A070', 'A105_R', 'A032', 'A097', 'A078_L', 'A123', 'A091_R', 'A096_L', 'A057', 'A076', 'A135', 'A066', 'A126', 'A060', 'A015', 'A017_L', 'A089_R', 'A014', 'A094_R', 'A134', 'A043', 'A103', 'A051_R', 'A044', 'A083', 'A113', 'A108', 'A092', 'A062_L', 'A071', 'A085', 'A098', 'PA6', 'A056', 'A008', 'A084', 'A064', 'A029', 'A121', 'A012', 'A003', 'A041', 'A100', 'A096_R', 'A038_R', 'A082', 'PA5', 'A024', 'A112', 'A028', 'A087', 'A040', 'A081', 'A130_R', 'A050', 'A026', 'A095', 'A027', 'A059_L', 'A105_L', 'A080', 'A038_M', 'A079', 'A010', 'A093', 'A133', 'A138', 'A046', 'A067', 'A077', 'A038_L']


In [125]:
import pandas as pd

stats = []
for thresh in [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5, 2.75, 3, 3.25, 3.5, 3.75, 4]:
    print("====================================")
    print(f"Thresh: {thresh}")
    print("====================================")

    if os.path.exists(f"{base}/train/test_out_adjust/file_stats_{thresh}_2.csv"):
        df_aneus = pd.read_csv(f"{base}/train/test_out_adjust/aneus_stats_{thresh}_2.csv")
        df_pred = pd.read_csv(f"{base}/train/test_out_adjust/pred_stats_{thresh}_2.csv")
        df_file = pd.read_csv(f"{base}/train/test_out_adjust/file_stats_{thresh}_2.csv")
    else:
        stats_per_aneu, stats_per_pred, stats_per_file = calc_stats(thresh)

        df_aneus = pd.DataFrame(stats_per_aneu, columns=["file", "aneu_idx", "pred_idx", "num_predicted_aneus", "aneu_volume", "pred_aneus_volume", "jaccard", "num_aneus", "num_aneus_pred"])
        df_pred = pd.DataFrame(stats_per_pred, columns=["file", "aneu_idx", "pred_idx", "num_predicted_aneus", "aneu_volume", "pred_aneus_volume", "jaccard", "num_aneus", "num_aneus_pred"])
        df_file = pd.DataFrame(stats_per_file, columns=["file", "num_aneus", "num_predicted_aneus", "overall_jacard"])

        df_aneus.to_csv(f"{base}/train/test_out_adjust/aneus_stats_{thresh}_2.csv")
        df_pred.to_csv(f"{base}/train/test_out_adjust/pred_stats_{thresh}_2.csv")
        df_file.to_csv(f"{base}/train/test_out_adjust/file_stats_{thresh}_2.csv")

    false_negative = df_aneus[df_aneus["jaccard"] == 0]
    true_positive = df_aneus[df_aneus["jaccard"] > 0]
    false_positive = df_pred[df_pred["jaccard"] == 0]

    false_negative_rate = len(false_negative)
    true_positive_rate = len(true_positive)
    false_positive_rate = len(false_positive)

    aneu_jacard = df_aneus["jaccard"].mean()
    tp_jaccard = true_positive["jaccard"].mean()
    overall_jacard = df_file["overall_jacard"].mean()

    stats.append([thresh, false_negative_rate, true_positive_rate, false_positive_rate, aneu_jacard, tp_jaccard, overall_jacard])

Thresh: 0.5
Thresh: 0.75
Thresh: 1
Thresh: 1.25
Thresh: 1.5
Thresh: 1.75
Thresh: 2
Thresh: 2.25
Thresh: 2.5
Thresh: 2.75
Thresh: 3
Batch 0 / 10
0/76: A074
1/76: A086
2/76: A130_L
3/76: A001
4/76: A119
5/76: A070
6/76: A105_R
7/76: A032
Filtered out
Filtered out
Filtered out
Batch 1 / 10
8/76: A097
9/76: A078_L
10/76: A123
11/76: A091_R
12/76: A096_L
13/76: A057
14/76: A076
15/76: A135
Filtered out
Batch 2 / 10
16/76: A066
17/76: A126
18/76: A060
19/76: A015
20/76: A017_L
21/76: A089_R
22/76: A014
23/76: A094_R
Filtered out
Filtered out
Filtered out
Batch 3 / 10
24/76: A134
25/76: A043
26/76: A103
27/76: A051_R
28/76: A044
29/76: A083
30/76: A113
31/76: A108
Batch 4 / 10
32/76: A092
33/76: A062_L
34/76: A071
35/76: A085
36/76: A098
37/76: PA6
38/76: A056
39/76: A008
Batch 5 / 10
40/76: A084
41/76: A064
42/76: A029
43/76: A121
44/76: A012
45/76: A003
46/76: A041
47/76: A100
Batch 6 / 10
48/76: A096_R
49/76: A038_R
50/76: A082
51/76: PA5
52/76: A024
53/76: A112
54/76: A028
55/76: A087
Fil

In [127]:
thresh = 1.5
df_aneus = pd.read_csv(f"{base}/train/test_out_adjust/aneus_stats_{thresh}_2.csv")
df_pred = pd.read_csv(f"{base}/train/test_out_adjust/pred_stats_{thresh}_2.csv")
df_file = pd.read_csv(f"{base}/train/test_out_adjust/file_stats_{thresh}_2.csv")

df_aneus["volume_diff"] = df_aneus["aneu_volume"] - df_aneus["pred_aneus_volume"]
df_aneus["volume_diff"]



0      -219
1       917
2      -190
3       120
4         3
      ...  
82    28419
83     -197
84      165
85      -70
86       91
Name: volume_diff, Length: 87, dtype: int64

In [126]:
df_stats = pd.DataFrame(stats, columns=["thresh", "false_negative_rate", "true_positive_rate", "false_positive_rate", "aneu_jaccard", "tp_jaccard", "overall_jacard"])

df_stats.sort_values(by="true_positive_rate", ascending=False)

Unnamed: 0,thresh,false_negative_rate,true_positive_rate,false_positive_rate,aneu_jaccard,tp_jaccard,overall_jacard
2,1.0,12,75,22,0.61998,0.719177,0.689528
4,1.5,12,75,16,0.633683,0.735072,0.676309
5,1.75,12,75,17,0.611487,0.709325,0.642807
3,1.25,13,74,19,0.635348,0.746963,0.692992
6,2.0,16,71,29,0.54545,0.668368,0.582632
1,0.75,19,68,33,0.549363,0.702861,0.643067
7,2.25,20,67,26,0.475339,0.617232,0.509118
8,2.5,27,60,27,0.401953,0.582832,0.436552
0,0.5,30,57,53,0.427932,0.653159,0.568212
9,2.75,36,51,24,0.322501,0.550149,0.366069


In [10]:

t = np.zeros((5, 5))
t2 = np.zeros((5, 5))

t[1, 1] = 1
t2[1, 1] = 1
t2[1, 2] = 1

t3 = np.maximum(t, t2)
t3

array([[0., 0., 0., 0., 0.],
       [0., 1., 1., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])