In [27]:
import os
from os.path import join, dirname, isdir
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm, TwoSlopeNorm
import pandas as pd
import numpy as np
from copy import deepcopy
import seaborn as sns

sns.set(color_codes=True, style="white")
from statannot import add_stat_annotation

### Parameters

In [21]:
bins = 6

folder = (
    "/Volumes/AnalysisGG/PROCESSED_DATA/JPCB-CondensateBoundaryDetection/Simulated-4096"
)
os.chdir(folder)

folder_save = "/Users/GGM/Documents/Graduate_Work/Nils_Walter_Lab/Writing/MyPublications/ResearchArticle-JPCB/Revision1"

lst_metric = [
    "deviation_center",
    "rmsd_edge",
    "fold_deviation_area",
    "fold_deviation_PC",
    "fold_deviation_PC_max",
]
dict_subtitle = {
    "deviation_center": "Center Deviation, nm",
    "rmsd_edge": "Edge Deviation RMSD, nm",
    "fold_deviation_area": "Area Deviation Fold Change",
    "fold_deviation_PC": "PC Deviation Fold Change",
    "fold_deviation_PC_max": "PC-max Deviation Fold Change",
}
dict_cmap = {
    "deviation_center": "magma",
    "rmsd_edge": "magma",
    "fold_deviation_area": "seismic",
    "fold_deviation_PC": "seismic",
    "fold_deviation_PC_max": "seismic",
}
cmap_default = "magma"
dict_vrange = {
    "deviation_center": (70, 150),
    "rmsd_edge": (50, 200),
    "fold_deviation_area": (0, 2),
    "fold_deviation_PC": (0, 2),
    "fold_deviation_PC_max": (0, 2),
}
dict_vrange_var = {
    "deviation_center": (10**2, 10**3),
    "rmsd_edge": (1, 10**3),
    "fold_deviation_area": (10 ** (-4), 10 ** (-1)),
    "fold_deviation_PC": (10 ** (-5), 10 ** (-3)),
    "fold_deviation_PC_max": (10 ** (-5), 10 ** (-3)),
}

### Functions

In [24]:
def assemble_heatmap(r, pc, df_result, heatmap, metric=None, operation="rate"):
    # assemble heatmap for different quantities
    for row in np.arange(len(r) - 1):
        for column in np.arange(len(pc) - 1):
            range_r = (r[row], r[row + 1])
            range_pc = (pc[column], pc[column + 1])

            within_r_range = df_result[
                (df_result["truth_r"] > range_r[0])
                & (df_result["truth_r"] <= range_r[1])
            ]
            within_r_and_pc_range = within_r_range[
                (within_r_range["truth_pc"] > range_pc[0])
                & (within_r_range["truth_pc"] <= range_pc[1])
            ]

            if operation == "rate":
                rate = (
                    within_r_and_pc_range[
                        within_r_and_pc_range["success"] == False
                    ].shape[0]
                    / within_r_and_pc_range.shape[0]
                )
                heatmap[row, column] = rate
                continue

            if (
                np.isnan(within_r_and_pc_range[metric]).sum()
                == within_r_and_pc_range.shape[0]
            ):
                heatmap[row, column] = np.nan
                continue

            if operation == "mean":
                heatmap[row, column] = np.nanmean(within_r_and_pc_range[metric])

            if operation == "var":
                heatmap[row, column] = np.nanvar(within_r_and_pc_range[metric])

    return heatmap

### Main

#### Load Method 1-3

In [17]:
lst_subfolders_1_3 = [
    f
    for f in os.listdir(folder)
    if isdir(f) & (not f.startswith("Result")) & (not f.startswith("ilastik"))
]
lst_subfolders_1_3

['Method-1-Denoise_Threshold', 'Method-3-GaussFit', 'Method-2-Canny']

In [18]:
subfolder = "Method-1-Denoise_Threshold"
fname = [f for f in os.listdir(subfolder) if f.endswith("results.csv")][0]
df_result_M1 = pd.read_csv(join(subfolder, fname), dtype=float)
subfolder = "Method-2-Canny"
fname = [f for f in os.listdir(subfolder) if f.endswith("results.csv")][0]
df_result_M2 = pd.read_csv(join(subfolder, fname), dtype=float)
subfolder = "Method-3-GaussFit"
fname = [f for f in os.listdir(subfolder) if f.endswith("results.csv")][0]
df_result_M3 = pd.read_csv(join(subfolder, fname), dtype=float)

#### Load Method 4

In [20]:
# Pool data through all subfolders
lst_subfolders = [
    f
    for f in os.listdir(folder)
    if isdir(f) & (not f.startswith("Results")) & f.startswith("ilastik")
]
fname = [f for f in os.listdir(lst_subfolders[0]) if f.endswith("results.csv")][0]
df_result_M4 = pd.read_csv(join(lst_subfolders[0], fname), dtype=float)
for idx in np.arange(1, len(lst_subfolders)):
    fname = [f for f in os.listdir(lst_subfolders[idx]) if f.endswith("results.csv")][0]
    df_current = pd.read_csv(join(lst_subfolders[idx], fname), dtype=float)
    df_result_M4 = pd.concat([df_result_M4, df_current])

#### Single Plot

In [35]:
# initialize heatmaps
r = np.linspace(100, 600, bins)
pc = np.linspace(2, 10, bins)
heatmap_base = np.zeros((bins - 1, bins - 1))

# ticks labels for all heatmaps
xticks = [round(x, 2) for x in (pc[:-1] + pc[1:]) / 2]
yticks = [round(x) for x in (r[:-1] + r[1:]) / 2]

In [102]:
metric = "rmsd_edge"
operation = "mean"  # "rate" for fail rate, "mean" or "var" for all other metices
heatmap_toplot = assemble_heatmap(
    r,
    pc,
    df_result_M1,
    heatmap_base,
    metric,
    operation,
)
path_save = join(folder_save, "Fig5B-M1.png")

In [103]:
# Pick the right normalization
if metric in ["deviation_center", "rmsd_edge"]:
    norm = LogNorm(vmin=dict_vrange[metric][0], vmax=dict_vrange[metric][1])
    var_norm = LogNorm(vmin=dict_vrange_var[metric][0], vmax=dict_vrange_var[metric][1])
elif metric in [
    "fold_deviation_area",
    "fold_deviation_PC",
    "fold_deviation_PC_max",
]:
    norm = TwoSlopeNorm(1, vmin=dict_vrange[metric][0], vmax=dict_vrange[metric][1])
    var_norm = LogNorm(vmin=dict_vrange_var[metric][0], vmax=dict_vrange_var[metric][1])

In [None]:
plt.figure(figsize=(5, 5), dpi=300)
# if norm=None, for fail rate
# ax = sns.heatmap(
#     data=heatmap_toplot,
#     xticklabels=xticks,
#     yticklabels=yticks,
#     annot=True,
#     cmap="magma",
#     vmax=1,
#     vmin=0,
#     cbar=False,
#     annot_kws={"fontsize": 15},
# )
ax = sns.heatmap(
    data=heatmap_toplot,
    xticklabels=xticks,
    yticklabels=yticks,
    annot=True,
    cmap=dict_cmap[metric],
    norm=norm,
    cbar=False,
    annot_kws={"fontsize": 15},
    fmt=".0f",
)
ax.invert_yaxis()
ax.tick_params(axis="both", which="major", labelsize=20)
plt.tight_layout()
plt.savefig(path_save, format="png", bbox_inches="tight")
# plt.show()

#### Multiple Plots for Annotator Bias

In [105]:
[
    f
    for f in os.listdir(folder)
    if isdir(f) & (not f.startswith("Results")) & f.startswith("ilastik")
]

['ilastik-Researcher-1',
 'ilastik-Researcher-6',
 'ilastik-Researcher-7',
 'ilastik-Researcher-5',
 'ilastik-Researcher-2',
 'ilastik-Researcher-3',
 'ilastik-Researcher-4']

In [106]:
lst_subfolders = [
    f
    for f in os.listdir(folder)
    if isdir(f) & (not f.startswith("Results")) & f.startswith("ilastik")
]


metric = "rmsd_edge"
operation = "mean"  # "rate" for fail rate, "mean" or "var" for all other metices
norm = LogNorm(vmin=dict_vrange[metric][0], vmax=dict_vrange[metric][1])
var_norm = LogNorm(vmin=dict_vrange_var[metric][0], vmax=dict_vrange_var[metric][1])

for idx in np.arange(0, len(lst_subfolders)):
    fname = [f for f in os.listdir(lst_subfolders[idx]) if f.endswith("results.csv")][0]
    df_current = pd.read_csv(join(lst_subfolders[idx], fname), dtype=float)
    heatmap_toplot = assemble_heatmap(
        r,
        pc,
        df_current,
        heatmap_base,
        metric,
        operation,
    )
    path_save = join(folder_save, "Fig9-" + lst_subfolders[idx].split("-")[-1] + ".png")
    plt.figure(figsize=(5, 5), dpi=300)
    ax = sns.heatmap(
        data=heatmap_toplot,
        xticklabels=xticks,
        yticklabels=yticks,
        annot=True,
        cmap=dict_cmap[metric],
        norm=norm,
        cbar=False,
        annot_kws={"fontsize": 15},
        fmt=".0f",
    )
    ax.invert_yaxis()
    ax.tick_params(axis="both", which="major", labelsize=20)
    plt.tight_layout()
    plt.savefig(path_save, format="png", bbox_inches="tight")
    plt.close()