In [32]:
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import data.breathe_data as bd
import data.helpers as dh
import inference.helpers as ih
import modelling_ar.ar as ar
import inf_cutset_conditioning.helpers as cutseth
import inf_cutset_conditioning.cutset_cond_algs_learn_ar_change as cca_ar_change
import models.helpers as mh

import json
import numpy as np

In [None]:
# Load data
df = bd.load_meas_from_excel("BR_O2_FEV1_FEF2575_conservative_smoothing_with_idx")

In [None]:
id = "134"
dftmp, start_idx, end_idx = dh.find_longest_conseq_sequence(df[df.ID == id], n_missing_days_allowed=2)


ecfev1_noise_model_suffix = "_std_add_mult_ecfev1"
ar_prior = "breathe (2 days model, ecFEV1 addmultnoise, ecFEF25-75)"
ar_change_cpt_suffix = "_shape_factor_single_laplace_card9"

(
    # p_M_given_D,
    # log_p_D_given_M,
    # AR_given_M_and_D,
    # AR_given_M_and_all_D,
    log_p_S_given_D,
    res_dict,
) = cca_ar_change.run_long_noise_model_through_time(
    dftmp,
    ar_prior=ar_prior,
    ar_change_cpt_suffix=ar_change_cpt_suffix,
    ecfev1_noise_model_suffix=ecfev1_noise_model_suffix,
)

## Process results

In [143]:
def load_and_process_json(card_suffix, params, labels):
    path = f"{dh.get_path_to_src()}inf_cutset_conditioning/p_s_given_d_card{card_suffix}.json"

    # Load the JSON file
    with open(path, "r") as file:
        data = json.load(file)

    # Convert lists to NumPy arrays
    for key in data:
        data[key] = np.array(data[key])

    # Replace 'nan' with np.nan and "-inf" with -np.inf
    for key in data:
        data[key][np.where(data[key] == "-inf")] = -np.inf

    # REmove dtypes from data
    data = {key: data[key].astype(float) for key in data}

    # Remove entries with only nan values
    data = {key: data[key] for key in data if not np.all(np.isnan(data[key]))}

    log_p_s_given_D = np.sum(list(data.values()), axis=0)

    df_heatmap = pd.DataFrame(
        columns=["log_p_S_given_D", "params"], data=zip(log_p_s_given_D, params)
    )

    # Split params into three columns "main width", "tail width", "main weight"
    df_heatmap[labels] = pd.DataFrame(
        df_heatmap["params"].tolist(), index=df_heatmap.index
    )
    return df_heatmap, data


# Plot heatmap with main width on x axis, tail width on y axis and log_p_S_given_D as the color, just use the first 3 main weights


def add_heatmap_for_weight(fig, df, weight, col):
    df = df[df["main weight"] == weight]
    fig.add_trace(
        go.Heatmap(z=df["log_p_S_given_D"], x=df["main width"], y=df["tail width"]),
        row=1,
        col=col,
    )
    fig.update_xaxes(
        type="category",
        row=1,
        col=col,
        title="Main width<br><br>Main weight=" + str(weight),
    )
    return fig


def add_heatmap_for_laplace_old(fig, df, col, row=1):
    fig.add_trace(
        go.Heatmap(
            z=df["log_p_S_given_D"],
            x=df["main width"],
            y=df["tail width"],
            colorbar=dict(
                len=4,  # Make colorbar 1.5x the height of the plot
                y=0.5,  # Center it vertically
                yanchor="middle",
            ),
        ),
        row=row,
        col=col,
    )
    fig.update_yaxes(
        type="category",
        row=row,
        col=col,
        title="",
        showticklabels=False,
    )
    fig.update_xaxes(
        type="category",
        row=row,
        col=col,
        title="Laplace width",
    )
    return fig


def plot_3_3_heatmaps(df_heatmap, IDs, w1, w2, w3):
    fig = make_subplots(1, 3)

    fig = add_heatmap_for_weight(fig, df_heatmap, w1, 1)
    fig = add_heatmap_for_weight(fig, df_heatmap, w2, 2)
    fig = add_heatmap_for_weight(fig, df_heatmap, w3, 3)

    fig.update_layout(
        title=f"Log probability of S given D for different AR change parameters ({len(IDs)} IDs)",
        yaxis_title="Tail width",
        # yaxis_type='log',
        width=650,
        height=300,
        font_size=10,
    )
    # Recude font size of the subplot titles
    for annotation in fig["layout"]["annotations"]:
        annotation["font"] = dict(size=14)
    # use the same color scale for all subplots
    fig.update_traces(
        zmin=min(df_heatmap.log_p_S_given_D[df_heatmap.log_p_S_given_D != -np.inf]),
        zmax=max(df_heatmap.log_p_S_given_D),
        colorbar=dict(title="log_p_S_given_D"),
    )
    # X axis and y axis are categorical
    fig.update_yaxes(type="category")

    fig.show()

In [23]:
params = [
    (1, 10, 0.5),
    (1, 10, 0.7),
    (1, 10, 0.9),
    (1, 30, 0.5),
    (1, 30, 0.7),
    (1, 30, 0.9),
    (1, 50, 0.5),
    (1, 50, 0.7),
    (1, 50, 0.9),
    (3, 10, 0.5),
    (3, 10, 0.7),
    (3, 10, 0.9),
    (3, 30, 0.5),
    (3, 30, 0.7),
    (3, 30, 0.9),
    (3, 50, 0.5),
    (3, 50, 0.7),
    (3, 50, 0.9),
    (5, 10, 0.5),
    (5, 10, 0.7),
    (5, 10, 0.9),
    (5, 30, 0.5),
    (5, 30, 0.7),
    (5, 30, 0.9),
    (5, 50, 0.5),
    (5, 50, 0.7),
    (5, 50, 0.9),
]
labels = ["main width", "tail width", "main weight"]
df_heatmap, IDs = load_and_process_json(27, params, labels)

In [None]:
plot_3_3_heatmaps(df_heatmap, IDs, 0.5, 0.7, 0.9)

In [None]:
params = [
    (1, 10, 0.7),
    (1, 10, 0.73),
    (1, 10, 0.76),
    (1, 10, 0.79),
    (1, 10, 0.81),
    (1, 10, 0.84),
    (1, 10, 0.87),
    (1, 10, 0.9),
    (1, 10, 0.93),
    (1, 10, 0.96),
    (1, 10, 0.99),
]

df_heatmap, IDs = load_and_process_json(11, params, labels)
df_heatmap

In [None]:
params = [
    (0.2, 1, 0.9),
    (0.2, 1, 0.95),
    (0.2, 1, 1),
    (0.2, 4, 0.9),
    (0.2, 4, 0.95),
    (0.2, 4, 1),
    (0.2, 10, 0.9),
    (0.2, 10, 0.95),
    (0.2, 10, 1),
    (0.4, 1, 0.9),
    (0.4, 1, 0.95),
    (0.4, 1, 1),
    (0.4, 4, 0.9),
    (0.4, 4, 0.95),
    (0.4, 4, 1),
    (0.4, 10, 0.9),
    (0.4, 10, 0.95),
    (0.4, 10, 1),
    (1, 1, 0.9),
    (1, 1, 0.95),
    (1, 1, 1),
    (1, 4, 0.9),
    (1, 4, 0.95),
    (1, 4, 1),
    (1, 10, 0.9),
    (1, 10, 0.95),
    (1, 10, 1),
]
labels = ["main width", "tail width", "main weight"]
df_heatmap, IDs = load_and_process_json(272, params, labels)
plot_3_3_heatmaps(df_heatmap, IDs, 0.9, 0.95, 1)

In [None]:
params = [
    (0.01, 0.1, 0.7),
    (0.01, 0.2, 0.7),
    (0.01, 0.5, 0.7),
    (0.01, 0.8, 0.7),
    (0.01, 1.1, 0.7),
    (0.01, 1.4, 0.7),
    (0.03, 0.1, 0.7),
    (0.03, 0.2, 0.7),
    (0.03, 0.5, 0.7),
    (0.03, 0.8, 0.7),
    (0.03, 1.1, 0.7),
    (0.03, 1.4, 0.7),
    (0.05, 0.1, 0.7),
    (0.05, 0.2, 0.7),
    (0.05, 0.5, 0.7),
    (0.05, 0.8, 0.7),
    (0.05, 1.1, 0.7),
    (0.05, 1.4, 0.7),
    (0.1, 0.2, 0.7),
    (0.1, 0.5, 0.7),
    (0.1, 0.8, 0.7),
    (0.1, 1.1, 0.7),
    (0.1, 1.4, 0.7),
    (0.15, 0.2, 0.7),
    (0.15, 0.5, 0.7),
    (0.15, 0.8, 0.7),
    (0.15, 1.1, 0.7),
    (0.15, 1.4, 0.7),
]
labels = ["main width", "tail width", "main weight"]
df_heatmap, IDs = load_and_process_json(28, params, labels)

fig = make_subplots()
fig = add_heatmap_for_weight(fig, df_heatmap, 0.7, 1)
fig.update_layout(
    title=f"Log probability of S given D for different AR change parameters ({len(IDs)} IDs)",
    yaxis_title="Tail width",
    # yaxis_type='log',
    width=450,
    height=400,
    font_size=10,
)
fig.show()

In [16]:
params = [
    (0.001, 0.01, 0.7),
    (0.001, 0.03, 0.7),
    (0.001, 0.07, 0.7),
    (0.001, 0.1, 0.7),
    (0.001, 0.15, 0.7),
    (0.003, 0.01, 0.7),
    (0.003, 0.03, 0.7),
    (0.003, 0.07, 0.7),
    (0.003, 0.1, 0.7),
    (0.003, 0.15, 0.7),
    (0.007, 0.01, 0.7),
    (0.007, 0.03, 0.7),
    (0.007, 0.07, 0.7),
    (0.007, 0.1, 0.7),
    (0.007, 0.15, 0.7),
    (0.01, 0.03, 0.7),
    (0.01, 0.07, 0.7),
    (0.01, 0.1, 0.7),
    (0.01, 0.15, 0.7),
    (0.025, 0.03, 0.7),
    (0.025, 0.07, 0.7),
    (0.025, 0.1, 0.7),
    (0.025, 0.15, 0.7),
]
labels = ["main width", "tail width", "main weight"]
df_heatmap, data = load_and_process_json(23, params, labels)

fig = make_subplots()
fig = add_heatmap_for_weight(fig, df_heatmap, 0.7, 1)
fig.update_layout(
    title=f"Log probability of S given D for different shape parameters<br>({len(IDs)} IDs)",
    yaxis_title="Tail width",
    # yaxis_type='log',
    width=450,
    height=400,
    font_size=10,
)
fig.show()

In [None]:
for id in data.keys():

    df_heatmap_for_ID = df_heatmap
    df_heatmap_for_ID["log_p_S_given_D"] = data[id]

    # Plot for ID
    fig = make_subplots()
    fig = add_heatmap_for_weight(fig, df_heatmap_for_ID, 0.7, 1)
    fig.update_layout(
        title=f"Log probability of S given D for different shape parameters",
        yaxis_title="Tail width",
        # yaxis_type='log',
        width=800,
        height=600,
        font_size=12,
    )
    fig.write_image(
        f"{dh.get_path_to_main()}PlotsBreathe/Interconnecting_ARs_entries/heatmaps_optimal_ar_change_factor_shape/ID {id}.pdf"
    )

In [26]:
params = [
    (0.001, 0.1, 1),
    (0.005, 0.1, 1),
    (0.01, 0.1, 1),
    (0.05, 0.1, 1),
    (0.1, 0.1, 1),
    (0.5, 0.1, 1),
    (1, 0.1, 1),
    (1.5, 0.1, 1),
    (2, 0.1, 1),
]
labels = ["main width", "tail width", "main weight"]
df_heatmap, data = load_and_process_json("9_18IDs_30d", params, labels)
IDs = list(data.keys())

fig = make_subplots()
fig = add_heatmap_for_laplace_old(fig, df_heatmap, 1)
fig.update_layout(
    title=f"Log probability of S given D for different shape parameters<br>({len(IDs)} IDs)",
    # yaxis_type='log',
    width=450,
    height=200,
    font_size=10,
)
fig.show()

In [31]:
params = [
    (0.1, 0.1, 1),
    (0.5, 0.1, 1),
    (1, 0.1, 1),
    (1.5, 0.1, 1),
    (2, 0.1, 1),
    (4, 0.1, 1),
    (6, 0.1, 1),
    (8, 0.1, 1),
    (10, 0.1, 1),
    (15, 0.1, 1),
]

labels = ["main width", "tail width", "main weight"]
# df_heatmap, data = load_and_process_json("10_18IDs_30d", params, labels)
df_heatmap, data = load_and_process_json("10_allIDs_10d", params, labels)
IDs = list(data.keys())

fig = make_subplots()
fig = add_heatmap_for_laplace_old(fig, df_heatmap, 1)
fig.update_layout(
    title=f"Log probability of S given D for different shape parameters<br>({len(IDs)} IDs)",
    # yaxis_type='log',
    width=450,
    height=200,
    font_size=10,
)
fig.show()

# Min: 0.01 to mimic identity (no change)
# Expected: Most of the data (90%) shifts little or not at all (<10% change), some data (10%) has a surprising shift (>10% change)
# Max: double the allowed shift for the same proportion of data. 90% data below 20% shift
[0.01, 0.1, 0.5, 1, 1.2, 1.4, 1.6, 1.8, 2, 5]

In [None]:
params = [0.01, 0.1, 0.5, 1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2, 5]


def transform_df_hm(df_hm, days):
    df_hm = df_hm.set_index("scale")
    df_hm = df_hm.drop(columns=["params"])
    df_hm["days"] = days

    print(np.max(df_hm["log_p_S_given_D"]))
    df_hm["log_p_S_given_D"] = df_hm["log_p_S_given_D"] - np.max(
        df_hm["log_p_S_given_D"]
    )
    df_hm["log_p_S_given_D"] = np.exp(df_hm["log_p_S_given_D"])
    df_hm["log_p_S_given_D"] = df_hm["log_p_S_given_D"] / np.sum(
        df_hm["log_p_S_given_D"]
    )
    return df_hm


def add_one_heatmap_for_laplace(fig, df, col, row=1):
    fig.add_trace(
        go.Heatmap(
            z=df["log_p_S_given_D"],
            x=df.index,
            y=df["days"],
            colorbar=dict(
                len=4,  # Make colorbar 1.5x the height of the plot
                y=0.5,  # Center it vertically
                yanchor="middle",
            ),
        ),
        row=row,
        col=col,
    )
    fig.update_traces(
        text=df.T.applymap(
            lambda x: "0" if x == 0 else f"{x:.3f}" if x >= 0.001 else f"{x:.0e}"
        ),
        texttemplate="%{text}",
        textfont={"size": 10},
        row=row,
        col=col,
    )
    return fig


labels = ["scale"]
# df_hm, data = load_and_process_json("10_18IDs_30d", params, labels)
df_hm_5, data_5 = load_and_process_json("14_5days", params, labels)
df_hm_10, data_10 = load_and_process_json("14_10days", params, labels)
df_hm_15, data_15 = load_and_process_json("14_15days", params, labels)
df_hm_20, data_20 = load_and_process_json("14_20days", params, labels)
df_hm_25, data_25 = load_and_process_json("14_25days", params, labels)
df_hm_50, data_50 = load_and_process_json("14_50days", params, labels)

df_hm_5 = transform_df_hm(df_hm_5, 5)
df_hm_10 = transform_df_hm(df_hm_10, 10)
df_hm_15 = transform_df_hm(df_hm_15, 15)
df_hm_20 = transform_df_hm(df_hm_20, 20)
df_hm_25 = transform_df_hm(df_hm_25, 25)
df_hm_50 = transform_df_hm(df_hm_50, 50)


IDs = list(data.keys())

fig = make_subplots(rows=6, shared_xaxes=True)
fig = add_one_heatmap_for_laplace(fig, df_hm_50, col=1, row=1)
fig = add_one_heatmap_for_laplace(fig, df_hm_25, col=1, row=2)
fig = add_one_heatmap_for_laplace(fig, df_hm_20, col=1, row=3)
fig = add_one_heatmap_for_laplace(fig, df_hm_15, col=1, row=4)
fig = add_one_heatmap_for_laplace(fig, df_hm_10, col=1, row=5)
fig = add_one_heatmap_for_laplace(fig, df_hm_5, col=1, row=6)
fig.update_yaxes(
    type="category", title="Maximum sequence<br>length (days)", row=3, col=1
)
fig.update_yaxes(
    type="category",
    tickvals=[5, 10, 15, 20, 25, 50],
)
fig.update_xaxes(type="category")
fig.update_xaxes(type="category", title="Laplace scale parameter (b)", row=6, col=1)

fig.update_layout(
    title=f"Log probability of S given D for different shape parameters<br>({len(IDs)} IDs)",
    # yaxis_type='log',
    width=800,
    height=400,
    font_size=11,
)
fig.show()

df_hm_5.head(2)

# Why selecting 3 consecutive days? Tradeoff between more data and still day to day changes
# For different range of consec days allowed, compute the total amount of data used for a max sequence 5-100 days and the number of IDs involved
# If I do 10 days, there's prob no need to use 3 consec days, just 1 day is enough

# Rerun by not excluding individual sequences that are smaller than the max sequence length

# Randomly select different sequences for each ID and then get the avg probability, compute error bars.

-6907.46026151
-9598.024251059996
-11462.303832819996
-12955.684571239992
-14772.762306290002
-13631.226746740002



DataFrame.applymap has been deprecated. Use DataFrame.map instead.


DataFrame.applymap has been deprecated. Use DataFrame.map instead.


DataFrame.applymap has been deprecated. Use DataFrame.map instead.


DataFrame.applymap has been deprecated. Use DataFrame.map instead.


DataFrame.applymap has been deprecated. Use DataFrame.map instead.


DataFrame.applymap has been deprecated. Use DataFrame.map instead.



Unnamed: 0_level_0,log_p_S_given_D,days
scale,Unnamed: 1_level_1,Unnamed: 2_level_1
0.01,2.474681e-288,5
0.1,4.753456e-226,5


In [None]:
params = [0.01, 0.1, 0.5, 1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2, 5]


def add_heatmap_for_laplace(fig, df):
    # Create separate heatmap for each row (day)
    # for i, day in enumerate(df.columns):
    fig.add_trace(
        go.Heatmap(
            z=df.T,
            x=df.index,
            y=df.columns,
            colorbar=dict(
                len=4,
                y=0.5,
                yanchor="middle",
            ),
            showscale=True,
        ),
        row=1,
        col=1,
    )

    # Update text and axes for all traces
    fig.update_traces(
        text=df.T.applymap(
            lambda x: "0" if x == 0 else f"{x:.3f}" if x >= 0.001 else f"{x:.0e}"
        ),
        texttemplate="%{text}",
        textfont={"size": 10},
    )

    fig.update_yaxes(
        type="category",
        title="days",
        showticklabels=True,
    )
    fig.update_xaxes(
        type="category",
        title="scale parameter (b)",
    )
    return fig


def transform_df_hm(df_hm, days):
    df_hm = df_hm.set_index("scale")
    df_hm = df_hm.drop(columns=["params"])
    df_hm = df_hm.rename(columns={"log_p_S_given_D": days})

    print(np.max(df_hm[days]))
    df_hm[days] = df_hm[days] - np.max(df_hm[days])
    df_hm[days] = np.exp(df_hm[days])
    df_hm[days] = df_hm[days] / np.sum(df_hm[days])
    return df_hm


labels = ["scale"]
# df_hm, data = load_and_process_json("10_18IDs_30d", params, labels)
df_hm_5, data_5 = load_and_process_json("14_5days", params, labels)
df_hm_10, data_10 = load_and_process_json("14_10days", params, labels)
df_hm_15, data_15 = load_and_process_json("14_15days", params, labels)
df_hm_20, data_20 = load_and_process_json("14_20days", params, labels)
df_hm_25, data_25 = load_and_process_json("14_25days", params, labels)
df_hm_50, data_50 = load_and_process_json("14_50days", params, labels)

df_hm_5 = transform_df_hm(df_hm_5, 5)
df_hm_10 = transform_df_hm(df_hm_10, 10)
df_hm_15 = transform_df_hm(df_hm_15, 15)
df_hm_20 = transform_df_hm(df_hm_20, 20)
df_hm_25 = transform_df_hm(df_hm_25, 25)
df_hm_50 = transform_df_hm(df_hm_50, 50)


df_hm = pd.merge(df_hm_5, df_hm_10, right_index=True, left_index=True)
df_hm = pd.merge(df_hm, df_hm_15, right_index=True, left_index=True)
df_hm = pd.merge(df_hm, df_hm_20, right_index=True, left_index=True)
df_hm = pd.merge(df_hm, df_hm_25, right_index=True, left_index=True)
df_hm = pd.merge(df_hm, df_hm_50, right_index=True, left_index=True)


IDs = list(data.keys())

fig = make_subplots()
fig = add_heatmap_for_laplace(fig, df_hm)
fig.update_layout(
    title=f"Log probability of S given D for different shape parameters<br>({len(IDs)} IDs)",
    # yaxis_type='log',
    # width=450,
    # height=200,
    font_size=10,
)
fig.show()

df_hm.head(2)

-6907.46026151
-9598.024251059996
-11462.303832819996
-12955.684571239992
-14772.762306290002
-13631.226746740002



DataFrame.applymap has been deprecated. Use DataFrame.map instead.



Unnamed: 0_level_0,5,10,15,20,25,50
scale,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.01,2.474681e-288,0.0,0.0,0.0,0.0,0.0
0.1,4.753456e-226,3.571301e-308,0.0,0.0,0.0,1.4889379999999998e-284


In [None]:
params = [0.01, 0.1, 0.5, 1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2, 5]


def add_heatmap_for_laplace(fig, df, col):
    fig.add_trace(
        go.Heatmap(
            z=df.T,
            x=df.index,
            y=df.columns,
            colorbar=dict(
                len=4,  # Make colorbar 1.5x the height of the plot
                y=0.5,  # Center it vertically
                yanchor="middle",
            ),
        ),
        row=1,
        col=col,
    )
    fig.update_traces(
        text=df.T.applymap(lambda x: f"{x:.2e}"),
        texttemplate="%{text}",
        textfont={"size": 10},
        showscale=True,
    )
    fig.update_yaxes(
        type="category",
        row=1,
        col=col,
        title="days",
        showticklabels=True,
    )
    fig.update_xaxes(
        type="category",
        row=1,
        col=col,
        title="scale parameter (b)",
    )
    return fig


def transform_df_hm(df_hm, days):
    df_hm = df_hm.set_index("scale")
    df_hm = df_hm.drop(columns=["params"])
    df_hm = df_hm.rename(columns={"log_p_S_given_D": days})

    print(np.max(df_hm[days]))
    max = np.max(df_hm[days])
    overall_max = -14772
    df_hm[days] = df_hm[days] - np.max(df_hm[days])
    # df_hm[days] = np.exp(df_hm[days])
    # df_hm[days] = df_hm[days] / np.sum(df_hm[days])
    return df_hm


labels = ["scale"]
# df_hm, data = load_and_process_json("10_18IDs_30d", params, labels)
df_hm_5, data_5 = load_and_process_json("14_5days", params, labels)
df_hm_10, data_10 = load_and_process_json("14_10days", params, labels)
df_hm_15, data_15 = load_and_process_json("14_15days", params, labels)
df_hm_20, data_20 = load_and_process_json("14_20days", params, labels)
df_hm_25, data_25 = load_and_process_json("14_25days", params, labels)
df_hm_50, data_50 = load_and_process_json("14_50days", params, labels)

df_hm_5 = transform_df_hm(df_hm_5, 5)
df_hm_10 = transform_df_hm(df_hm_10, 10)
df_hm_15 = transform_df_hm(df_hm_15, 15)
df_hm_20 = transform_df_hm(df_hm_20, 20)
df_hm_25 = transform_df_hm(df_hm_25, 25)
df_hm_50 = transform_df_hm(df_hm_50, 50)


df_hm = pd.merge(df_hm_5, df_hm_10, right_index=True, left_index=True)
df_hm = pd.merge(df_hm, df_hm_15, right_index=True, left_index=True)
df_hm = pd.merge(df_hm, df_hm_20, right_index=True, left_index=True)
df_hm = pd.merge(df_hm, df_hm_25, right_index=True, left_index=True)
df_hm = pd.merge(df_hm, df_hm_50, right_index=True, left_index=True)


IDs = list(data.keys())

fig = make_subplots()
fig = add_heatmap_for_laplace(fig, df_hm, 1)
fig.update_layout(
    title=f"Log probability of S given D for different shape parameters<br>({len(IDs)} IDs)",
    # yaxis_type='log',
    # width=450,
    # height=200,
    font_size=10,
)
fig.show()

df_hm.head(2)

-6907.46026151
-9598.024251059996
-11462.303832819996
-12955.684571239992
-14772.762306290002
-13631.226746740002



DataFrame.applymap has been deprecated. Use DataFrame.map instead.



Unnamed: 0_level_0,5,10,15,20,25,50
scale,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.01,-7568.84491,-10565.053218,-12839.930216,-14482.29323,-16406.931524,-14673.042388
0.1,-7425.431873,-10305.346565,-12448.937648,-14055.000464,-15937.512827,-14284.677584


In [96]:
x = df_hm_5[5].values
max = np.max(x)
x = x - max
x = np.exp(x)
x = x / np.sum(x)
x

array([2.47468092e-288, 4.75345647e-226, 5.05923754e-105, 4.06148273e-029,
       3.62179080e-015, 2.09223791e-010, 8.04535837e-007, 3.10348780e-004,
       1.69133081e-002, 1.73445308e-001, 4.25816373e-001, 3.06613392e-001,
       7.69004658e-002, 3.96349077e-087])

## Process results (archive)

In [None]:
# Element wise addition of the values in data
params = [
    (2, 15, 0.3),
    (2, 15, 0.5),
    (2, 15, 0.7),
    (2, 30, 0.3),
    (2, 30, 0.5),
    (2, 30, 0.7),
    (2, 50, 0.3),
    (2, 50, 0.5),
    (2, 50, 0.7),
    (5, 15, 0.3),
    (5, 15, 0.5),
    (5, 15, 0.7),
    (5, 30, 0.3),
    (5, 30, 0.5),
    (5, 30, 0.7),
    (5, 50, 0.3),
    (5, 50, 0.5),
    (5, 50, 0.7),
    (8, 15, 0.3),
    (8, 15, 0.5),
    (8, 15, 0.7),
    (8, 30, 0.3),
    (8, 30, 0.5),
    (8, 30, 0.7),
    (8, 50, 0.3),
    (8, 50, 0.5),
    (8, 50, 0.7),
]
labels = ["main width", "tail width", "main weight"]

df_heatmap, IDs = load_and_process_json(27, params, labels)
df_heatmap

In [None]:
# Plot heatmap with main width on x axis, tail width on y axis and log_p_S_given_D as the color, just use the first 3 main weights

fig = make_subplots(1, 3)


def add_heatmap_for_weight(df, weight, col):
    df = df[df["main weight"] == weight]
    fig.add_trace(
        go.Heatmap(z=df["log_p_S_given_D"], x=df["main width"], y=df["tail width"]),
        row=1,
        col=col,
    )
    fig.update_xaxes(
        type="category",
        row=1,
        col=col,
        title="Main width<br><br>Main weight=" + str(weight),
    )


add_heatmap_for_weight(df_heatmap, 0.3, 1)
add_heatmap_for_weight(df_heatmap, 0.5, 2)
add_heatmap_for_weight(df_heatmap, 0.7, 3)

fig.update_layout(
    title=f"Log probability of S given D for different AR change parameters ({len(IDs.keys())} IDs)",
    yaxis_title="Tail width",
    # yaxis_type='log',
    width=650,
    height=300,
    font_size=10,
)
# Recude font size of the subplot titles
for annotation in fig["layout"]["annotations"]:
    annotation["font"] = dict(size=14)
# use the same color scale for all subplots
fig.update_traces(
    zmin=min(df_heatmap.log_p_S_given_D[df_heatmap.log_p_S_given_D != -np.inf]),
    zmax=max(df_heatmap.log_p_S_given_D),
    colorbar=dict(title="log_p_S_given_D"),
)
# X axis and y axis are categorical
fig.update_yaxes(type="category")

fig.show()

In [None]:
# The weights is the most differentiating parameter, then main width. Tail width is a smaller effect, althought very important, and remains hidden
# 0.7 weight
# std 2 -> laplace would be better
# tail width doesn't change. Go wider?

# Since the tails is a secondary effet, let's find the rest first

# 3 laplace
# 3 gaussians

In [None]:
params = [
    (0.5, False),
    (0.5, True),
    (1, False),
    (1, True),
    (1.5, False),
    (1.5, True),
    (2, False),
    (2, True),
    (2.5, False),
    (2.5, True),
]
labels = ["main width", "laplace"]

df_10, IDs_10 = load_and_process_json(10, params, labels)
df_10

In [None]:
# Plot heatmap with main width on x axis, tail width on y axis and log_p_S_given_D as the color, just use the first 3 main weights

tail_width = 30
main_weight = 0.7

fig = make_subplots(1, 1)

fig.add_trace(
    go.Heatmap(z=df_10["log_p_S_given_D"], x=df_10["main width"], y=df_10["laplace"]),
    row=1,
    col=1,
)
fig.update_xaxes(
    type="category",
    row=1,
    col=1,
    title="Main width",
)

fig.update_layout(
    title=f"Log P(S|D) ({len(IDs_10)} IDs)<br>tail width={tail_width}, main weight={main_weight}",
    yaxis_title="Laplace",
    # yaxis_type='log',
    width=400,
    height=250,
    font_size=10,
)
# Recude font size of the subplot titles
for annotation in fig["layout"]["annotations"]:
    annotation["font"] = dict(size=14)
# use the same color scale for all subplots
fig.update_traces(
    zmin=min(df_10.log_p_S_given_D[df_10.log_p_S_given_D != -np.inf]),
    zmax=max(df_10.log_p_S_given_D),
    colorbar=dict(title="log_p_S_given_D"),
)
# X axis and y axis are categorical
fig.update_yaxes(type="category")

fig.show()

In [None]:
mean = 1
sigma_spike = 0.5
weight_spike = 0.7
sigma_tails = [5, 15, 30]
laplace_main = True
laplace_tail = True

params = [5, 15, 30]
labels = ["tail width"]

df_3, IDs_3 = load_and_process_json(3, params, labels)
df_3

## Refine weights with best matching std_m, std_t

In [170]:
mean = 1
sigma_spike = 0.2
weight_spike = [
    0.59,
    0.61,
    0.64,
    0.67,
    0.7,
    0.73,
    0.76,
    0.79,
    0.81,
    0.84,
    0.87,
    0.9,
    0.93,
    0.96,
    0.99,
]
sigma_tail = 30
laplace_main = False
laplace_tail = False
params = weight_spike
labels = ["main weight"]

df_15, IDs_15 = load_and_process_json("15_weights", params, labels)

In [None]:
df_15

In [None]:
# Plot heatmap with main width on x axis, tail width on y axis and log_p_S_given_D as the color, just use the first 3 main weights

main_width = 5
main_weight = 0.7

fig = make_subplots(1, 1)

fig.add_trace(
    go.Heatmap(z=df_3["log_p_S_given_D"], x=df_3["tail width"], y=df_3["laplace"]),
    row=1,
    col=1,
)
fig.update_xaxes(
    type="category",
    row=1,
    col=1,
    title="Tails width",
)

fig.update_layout(
    title=f"Log P(S|D) ({len(IDs_10)} IDs)<br>main width={main_width}, main weight={main_weight}, all laplace",
    # yaxis_title="Laplace",
    # yaxis_type='log',
    width=400,
    height=200,
    font_size=10,
)
# Recude font size of the subplot titles
for annotation in fig["layout"]["annotations"]:
    annotation["font"] = dict(size=14)
# use the same color scale for all subplots
fig.update_traces(
    zmin=min(df_3.log_p_S_given_D[df_3.log_p_S_given_D != -np.inf]),
    zmax=max(df_3.log_p_S_given_D),
    colorbar=dict(title="log_p_S_given_D"),
)
# X axis and y axis are categorical
fig.update_yaxes(type="category")

fig.show()

## Weight 0.73

In [172]:
# Refine main width

mean = 1
sigma_spike = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
weight_spike = 0.73
sigma_tail = 30
laplace_main = False
laplace_tail = False
params = sigma_spike
labels = ["main std"]

df_9, IDs_9 = load_and_process_json("9_stdmain", params, labels)

In [None]:
df_9

## Weight 0.73, main std 0.2

In [None]:
mean = 1
sigma_spike = 0.2
weight_spike = 0.73
sigma_tail = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70]
laplace_main = False
laplace_tail = False
params = sigma_tail
print(len(params))

df_tail, IDs_tail = load_and_process_json("15_stdtail", params, labels)