In [6]:
import pandas as pd
import plotly.express as px

labels_cytoplasm = pd.read_csv("bboxes/labels/cytoplasm/bb_data_cytoplasm.csv")
labels_nucleus = pd.read_csv("bboxes/labels/nucleus/bb_data_nucleus.csv")
segm_cytoplasm = pd.read_csv("bboxes/segmentation/cytoplasm/bb_data_cytoplasm.csv")
segm_nucleus = pd.read_csv("bboxes/segmentation/nucleus/bb_data_nucleis.csv")

In [7]:
results = (
    labels_cytoplasm.merge(labels_nucleus, on="img", suffixes=("_cyto_lbl", "_nuc_lbl"))
    .merge(segm_cytoplasm, on="img", suffixes=("", "_cyto_segm"))
    .merge(segm_nucleus, on="img", suffixes=("", "_nuc_segm"))
)

results.columns = ["img", "cyto_lbl", "nuc_lbl", "cyto_segm", "nuc_segm"]

results.head()

Unnamed: 0,img,cyto_lbl,nuc_lbl,cyto_segm,nuc_segm
0,1,19,26,14,25
1,2,17,25,14,18
2,3,18,28,16,21
3,4,23,42,17,38
4,5,19,22,18,20


In [8]:
results["cyto_dif"] = results["cyto_lbl"] - results["cyto_segm"]
results["nuc_dif"] = results["nuc_lbl"] - results["nuc_segm"]

results.head()

Unnamed: 0,img,cyto_lbl,nuc_lbl,cyto_segm,nuc_segm,cyto_dif,nuc_dif
0,1,19,26,14,25,5,1
1,2,17,25,14,18,3,7
2,3,18,28,16,21,2,7
3,4,23,42,17,38,6,4
4,5,19,22,18,20,1,2


In [9]:
results["cyto_dif"].describe()

count    50.000000
mean      2.900000
std       2.459882
min      -2.000000
25%       1.250000
50%       3.000000
75%       4.000000
max       9.000000
Name: cyto_dif, dtype: float64

In [10]:
results["nuc_dif"].describe()

count    50.000000
mean      3.060000
std       2.298269
min      -1.000000
25%       2.000000
50%       3.000000
75%       4.750000
max       7.000000
Name: nuc_dif, dtype: float64

In [11]:
results["color"] = results["nuc_dif"].apply(lambda x: "red" if x < 0 else "green")

fig = px.bar(
    results,
    x="img",
    y="nuc_dif",
    color="color",
    color_discrete_map={"red": "rgba(255, 0, 0, 0.5)", "green": "rgba(0, 255, 0, 0.5)"},
    template="plotly_white",
    labels={"nuc_dif": "Rozdíl počtu jader", "img": "ID snímku"},
)

fig.update_layout(showlegend=False, width=1200, font=dict(size=16))


fig.show()

In [12]:
results["color"] = results["cyto_dif"].apply(lambda x: "red" if x < 0 else "green")

fig = px.bar(
    results,
    x="img",
    y="cyto_dif",
    color="color",
    color_discrete_map={"red": "rgba(255, 0, 0, 0.5)", "green": "rgba(0, 255, 0, 0.5)"},
    template="plotly_white",
    labels={"cyto_dif": "Rozdíl počtu cytoplasem", "img": "ID snímku"},
)

fig.update_layout(showlegend=False, width=1200, font=dict(size=16))

fig.show()

In [13]:
from pathlib import Path

import numpy as np
from sklearn.metrics import f1_score

cyto_lbl_dir = Path("bboxes/labels/cytoplasm")
cyto_segm_dir = Path("bboxes/segmentation/cytoplasm")

cyto_score = []

for label_img in cyto_lbl_dir.glob("*.npy"):
    groundtruth = np.load(label_img).flatten()
    my_segm = np.load(cyto_segm_dir / f"{label_img.stem}_segmented.npy").flatten()

    groundtruth = np.where(groundtruth == 255, 1, groundtruth)
    my_segm = np.where(my_segm == 255, 1, my_segm)

    cyto_score.append((label_img.stem, f1_score(groundtruth, my_segm)))

In [14]:
cyto_score = pd.DataFrame(cyto_score, columns=["img", "score"])
cyto_score.head()

Unnamed: 0,img,score
0,6,0.813582
1,20,0.818199
2,24,0.692241
3,12,0.783794
4,37,0.832467


In [15]:
cyto_score.describe()

Unnamed: 0,score
count,50.0
mean,0.804331
std,0.05829
min,0.521506
25%,0.793874
50%,0.814527
75%,0.834493
max,0.893524


In [16]:
nuc_lbl_dir = Path("bboxes/labels/nucleus")
nuc_segm_dir = Path("bboxes/segmentation/nucleus")

nuc_score = []

for label_img in nuc_lbl_dir.glob("*.npy"):
    groundtruth = np.load(label_img).flatten()
    my_segm = np.load(nuc_segm_dir / f"{label_img.stem}_segmented.npy").flatten()

    groundtruth = np.where(groundtruth == 255, 1, groundtruth)
    my_segm = np.where(my_segm == 255, 1, my_segm)

    nuc_score.append((label_img.stem, f1_score(groundtruth, my_segm)))

In [17]:
nuc_score = pd.DataFrame(nuc_score, columns=["img", "score"])
nuc_score.head()

Unnamed: 0,img,score
0,6,0.820443
1,20,0.788627
2,24,0.783909
3,12,0.775533
4,37,0.789618


In [18]:
nuc_score.describe()

Unnamed: 0,score
count,50.0
mean,0.783483
std,0.046379
min,0.683486
25%,0.751245
50%,0.788121
75%,0.8153
max,0.897396


In [19]:
print(type(cyto_score))
scores = cyto_score.merge(nuc_score, on="img", suffixes=("_cyto", "_nuc"))

scores.head()

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,img,score_cyto,score_nuc
0,6,0.813582,0.820443
1,20,0.818199,0.788627
2,24,0.692241,0.783909
3,12,0.783794,0.775533
4,37,0.832467,0.789618


In [21]:
results_melt = results.melt(id_vars=["img"], var_name="Column", value_name="Value")

results_melt.head()

Unnamed: 0,img,Column,Value
0,1,cyto_lbl,19
1,2,cyto_lbl,17
2,3,cyto_lbl,18
3,4,cyto_lbl,23
4,5,cyto_lbl,19


In [22]:
from pathlib import Path

cytolbl_bbox_dir = Path("bboxes/labels/cytoplasm")
cytosegm_bbox_dir = Path("bboxes")

In [23]:
results_melt.columns

Index(['img', 'Column', 'Value'], dtype='object')

In [24]:
results_melt["Column"].unique()

array(['cyto_lbl', 'nuc_lbl', 'cyto_segm', 'nuc_segm', 'cyto_dif',
       'nuc_dif', 'color'], dtype=object)

In [25]:
filtered = results_melt.loc[
    results_melt["Column"].isin(["cyto_lbl", "nuc_lbl", "cyto_segm", "nuc_segm"])
]

custom_order = ["cyto_lbl", "cyto_segm", "nuc_lbl", "nuc_segm"]

filtered["Column"] = pd.Categorical(
    filtered["Column"], categories=custom_order, ordered=True
)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [26]:
fig = px.box(
    filtered,
    x="Column",
    y="Value",
    color="Column",
    template="plotly_white",
    labels={"Value": "Počet objektů"},
    category_orders={"Column": custom_order},
)

fig.update_xaxes(
    ticktext=[
        "Cytoplasma (anotace)",
        "Cytoplasma (segmentace)",
        "Jádra (anotace)",
        "Jádra (segmentace)",
    ],
    tickvals=["cyto_lbl", "cyto_segm", "nuc_lbl", "nuc_segm"],
)

fig.update_layout(
    width=1200, height=500, showlegend=False, xaxis_title=None, font=dict(size=16)
)

fig.show()

In [27]:
from pathlib import Path

segmented_watershed_aabb = Path("results_data/filled_boxes/aabb/").glob("*.npy")
segmented_watershed_obb = Path("results_data/filled_boxes/obb/").glob("*.npy")
labels_watershed_aabb = Path("labels/filled_boxes/aabb/").glob("*.npy")
labels_watershed_obb = Path("labels/filled_boxes/obb/").glob("*.npy")

In [28]:
import numpy as np
import pandas as pd
from skimage.segmentation import clear_border
from sklearn.metrics import f1_score


def calculate_f1_score(segmented_paths, box_type: str):
    scores = []
    for segm_path in segmented_paths:
        segm_name = segm_path.stem
        img_id = segm_name.split("_")[0]

        label_path = f"labels/filled_boxes/{box_type}/{img_id}_watershed_{box_type}.npy"

        segm = np.load(segm_path)
        label = np.load(label_path)

        segm_bin = np.where(segm > 0, 1, 0)
        label_bin = np.where(label > 0, 1, 0)

        segm_bin = clear_border(segm_bin)
        label_bin = clear_border(label_bin)

        score = f1_score(label_bin.flatten(), segm_bin.flatten())
        scores.append((score, img_id))

    return scores


segm_aabb = calculate_f1_score(segmented_watershed_aabb, "aabb")
segm_obb = calculate_f1_score(segmented_watershed_obb, "obb")
segm_aabb = pd.DataFrame(segm_aabb, columns=["score", "img"])
segm_obb = pd.DataFrame(segm_obb, columns=["score", "img"])
segm_aabb["type"] = "aabb"
segm_obb["type"] = "obb"

In [29]:
import plotly.express as px

fig = px.box(
    pd.concat([segm_aabb, segm_obb]),
    x="type",
    y="score",
    color="type",
    template="plotly_white",
    labels={"score": "F1 skóre"},
    # add points
    points="all",
)
fig.update_xaxes(ticktext=["AABB", "OBB"], tickvals=["aabb", "obb"])
fig.update_layout(
    showlegend=False,
    width=1200,
    height=500,
    xaxis_title=None,
    font=dict(size=16),
)

fig.show()