In [1]:
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import src.data.breathe_data as bd
import src.data.helpers as dh
import src.inference.helpers as ih
import src.modelling_ar.ar as ar
import src.inf_cutset_conditioning.helpers as cutseth
import src.inf_cutset_conditioning.cutset_cond_algs_learn_ar_change as cca_ar_change
import src.models.helpers as mh

import json
import numpy as np

In [31]:
# Load data
df = bd.load_meas_from_excel("BR_O2_FEV1_FEF2575_conservative_smoothing_with_idx")

INFO:root:* Checking for same day measurements *


In [3]:
id = "134"
dftmp, start_idx, end_idx = dh.find_longest_consec_series(df[df.ID == id], n_days=3)


ecfev1_noise_model_suffix = "_std_add_mult_fev1"
ar_prior = "breathe (2 days model, ecFEV1 addmultnoise, ecFEF25-75)"
ar_change_cpt_suffix = "_shape_factor1"

(
    # p_M_given_D,
    # log_p_D_given_M,
    # AR_given_M_and_D,
    # AR_given_M_and_all_D,
    log_p_S_given_D,
    res_dict,
) = cca_ar_change.run_long_noise_model_through_time(
    dftmp,
    ar_prior=ar_prior,
    ar_change_cpt_suffix=ar_change_cpt_suffix,
    ecfev1_noise_model_suffix=ecfev1_noise_model_suffix,
)

134 - Time for 18 entries: 728.41 s


## Process results

In [61]:
path = f"{dh.get_path_to_src()}inf_cutset_conditioning/p_s_given_d_1.json"

# Load the JSON file
with open(path, "r") as file:
    data = json.load(file)

# Convert lists to NumPy arrays
for key in data:
    data[key] = np.array(data[key])

# Replace 'nan' with np.nan and "-inf" with -np.inf
for key in data:
    data[key][np.where(data[key] == "-inf")] = -np.inf

# REmove dtypes from data
data = {key: data[key].astype(float) for key in data}

# Remove entries with only nan values
data = {key: data[key] for key in data if not np.all(np.isnan(data[key]))}

In [70]:
# Element wise addition of the values in data
log_p_s_given_D = np.sum(list(data.values()), axis=0)
log_p_s_given_D

array([-48962.13867207, -47189.60388391, -45881.19938921, -49764.57708978,
       -47511.76244048, -46008.50020359, -49409.69140965, -47172.9437243 ,
       -45812.26959697, -50750.66263521, -49485.21408381, -48454.56480863,
       -52036.6780729 , -50105.14001467, -48713.58027852, -51927.4812987 ,
       -49875.91232378, -48542.7686145 , -51936.06988512, -51156.01391021,
       -50469.61634538,            -inf, -52103.42447276, -50918.05915286,
                  -inf, -52039.12813598, -50824.6713664 ])

In [128]:
params = [
    (2, 15, 0.3),
    (2, 15, 0.5),
    (2, 15, 0.7),
    (2, 30, 0.3),
    (2, 30, 0.5),
    (2, 30, 0.7),
    (2, 50, 0.3),
    (2, 50, 0.5),
    (2, 50, 0.7),
    (5, 15, 0.3),
    (5, 15, 0.5),
    (5, 15, 0.7),
    (5, 30, 0.3),
    (5, 30, 0.5),
    (5, 30, 0.7),
    (5, 50, 0.3),
    (5, 50, 0.5),
    (5, 50, 0.7),
    (8, 15, 0.3),
    (8, 15, 0.5),
    (8, 15, 0.7),
    (8, 30, 0.3),
    (8, 30, 0.5),
    (8, 30, 0.7),
    (8, 50, 0.3),
    (8, 50, 0.5),
    (8, 50, 0.7),
]

df_heatmap = pd.DataFrame(
    columns=["log_p_S_given_D", "params"], data=zip(log_p_s_given_D, params)
)
# Split params into three columns "main width", "tail width", "main weight"
df_heatmap[["main width", "tail width", "main weight"]] = pd.DataFrame(
    df_heatmap["params"].tolist(), index=df_heatmap.index
)

# Plot heatmap with main width on x axis, tail width on y axis and log_p_S_given_D as the color, just use the first 3 main weights

fig = make_subplots(1, 3)


def add_heatmap_for_weight(df, weight, col):
    df = df[df["main weight"] == weight]
    fig.add_trace(
        go.Heatmap(z=df["log_p_S_given_D"], x=df["main width"], y=df["tail width"]),
        row=1,
        col=col,
    )
    fig.update_xaxes(type="category", row=1, col=col, title="Main width<br><br>Main weight=" + str(weight))


add_heatmap_for_weight(df_heatmap, 0.3, 1)
add_heatmap_for_weight(df_heatmap, 0.5, 2)
add_heatmap_for_weight(df_heatmap, 0.7, 3)

fig.update_layout(
    title=f"Log probability of S given D for different AR change parameters ({len(data.keys())} IDs)",
    yaxis_title="Tail width",
    # yaxis_type='log',
    width=650,
    height=300,
    font_size=10,
)
# Recude font size of the subplot titles
for annotation in fig["layout"]["annotations"]:
    annotation["font"] = dict(size=14)
# use the same color scale for all subplots
fig.update_traces(zmin=min(df_heatmap.log_p_S_given_D[df_heatmap.log_p_S_given_D != -np.inf]), zmax=max(df_heatmap.log_p_S_given_D), colorbar=dict(title="log_p_S_given_D"))
# X axis and y axis are categorical
fig.update_yaxes(type="category")

fig.show()

In [None]:
# The weights is the most differentiating parameter, then main width. Tail width is a smaller effect, althought very important, and remains hidden
# 0.7 weight
# std 2 -> laplace would be better
# tail width doesn't change. Go wider?

# Since the tails is a secondary effet, let's find the rest first

# 3 laplace
# 3 gaussians