Improvement of the through time factor


AR change factor card: ARxARxS


S is the variable that controls the standard deviation parameter of the gaussian noise added to the previous day's airway resistance

In [8]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
import models.helpers as mh
import models.cpts.helpers as cpth
import data.breathe_data as bd
import modelling_ar.ar as ar

import itertools

In [9]:
AR = mh.VariableNode("Airway resistance (%)", 0, 90, 2, prior=None)
AR1 = mh.VariableNode("Airway resistance day 1 (%)", 0, 90, 2, prior=None)
AR2 = mh.VariableNode("Airway resistance day 2 (%)", 0, 90, 2, prior=None)
# AR = mh.VariableNode("Airway resistance (%)", 0, 90, 1, prior=None)
# AR1 = mh.VariableNode("Airway resistance day 1 (%)", 0, 90, 1, prior=None)
# AR2 = mh.VariableNode("Airway resistance day 2 (%)", 0, 90, 1, prior=None)
# S = mh.DiscreteVariableNode("AR change factor shape", 1, 27, 1, prior=None)
# S.values

## Insights into different distributions

In [10]:
# Each bin of AR is uniformly distributed
# P(AR2 | AR1 = [0, 2]) is the convolution of a laplace distribution with a uniform distribution


# Let's just take the midbin for now
def laplace_pdf(x, mu, b):
    return 1 / (2 * b) * np.exp(-np.abs(x - mu) / b)


def gaussian_pdf(x, mu, sigma):
    return 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-0.5 * ((x - mu) / sigma) ** 2)


y_laplace = laplace_pdf(AR2.midbins, AR1.midbins[10], 5)
y_laplace = y_laplace / np.sum(y_laplace)

y_laplace = laplace_pdf(AR2.midbins, AR1.midbins[10], 5)
y_laplace = y_laplace / np.sum(y_laplace)

y_gaussian = gaussian_pdf(AR2.midbins, AR1.midbins[10], 5)
y_gaussian = y_gaussian / np.sum(y_gaussian)

fig = go.Figure()
fig.add_trace(go.Scatter(x=AR2.midbins, y=y_laplace, mode="markers+lines"))
# fig.add_trace(go.Scatter(x=AR2.midbins, y=y_gaussian, mode='markers+lines'))
fig.show()

In [None]:
# Create plot histogram plot for AR2 samples using go figure
fig = go.Figure()
# Add scatter plot with markers on ar2_dist
fig.add_trace(
    go.Scatter(
        x=AR2.midbins,
        y=cpt[:, 10, 4],
        mode="markers+lines",
        marker=dict(size=5, color="black"),
        line=dict(width=1.5, color="black"),
    )
)
fig.update_xaxes(title="Airway resistance day 2 (%)")
title = f"Validation of numerical computation of U({ar_down}, {ar_up}) x Laplace(U, s={shape})<br>against sampling (n={n})"
fig.update_layout(title=title, showlegend=False, width=700, height=400)
fig.show()

# GMM

In [91]:
AR1 = mh.VariableNode("Airway resistance day 1 (%)", 0, 90, 2, prior=None)
AR2 = mh.VariableNode("Airway resistance day 2 (%)", 0, 90, 2, prior=None)

cpt = np.zeros((AR2.card, AR1.card))

sigma_spike = 1.7
sigma_tails = 1
weight_spike = 1
laplace_main = True

pdf = np.zeros(AR2.card)
for i, z in enumerate(AR2.get_bins_arr()):
    y = AR1.get_bins_arr()[0]
    pdf[i] = ar.p_uniform_x_gmm(
        z[0],
        z[1],
        y[0],
        y[1],
        sigma_spike,
        sigma_tails,
        weight_spike,
        laplace_main,
    )

# Shift and associate to the next bin
for i in range(AR2.card):
    for j in range(AR1.card):
        if i >= j:
            cpt[i, j] = pdf[i - j]
        else:
            cpt[i, j] = pdf[j - i]
# Then normalise
cpt /= cpt.sum(axis=0)

In [92]:
cpt /= cpt.sum(axis=0)
# Add extra dimension to cpt
cpt = np.expand_dims(cpt, axis=2)
cpt.shape

(45, 45, 1)

In [94]:
S = mh.DiscreteVariableNode("AR change factor shape", 1, 1, 1, prior=None)
AR = mh.VariableNode("Airway resistance (%)", 0, 90, 2, prior=None)

cpth.save_cpt(
    [AR, AR, S],
    cpt,
    f"_shape_factor_single_laplace_1.7",
)

### Validate against sampling

In [None]:
# P(AR2 | AR1 = [0, 2]) is the convolution of a laplace distribution with a uniform distribution
# Sample AR1
def sample_from_uniform_x_gmm(ar1, ar2, sigma_spike, sigma_tails, weight_spike, n):
    n_spike = int(n * weight_spike)
    n_tail = n - n_spike
    ar1_spike = np.random.uniform(ar1, ar2, n_spike)
    ar1_tail = np.random.uniform(ar1, ar2, n_tail)

    ar2_spike = np.random.normal(ar1_spike, sigma_spike)
    ar2_tail = np.random.normal(ar1_tail, sigma_tails)

    return np.concatenate([ar2_spike, ar2_tail])

In [None]:
j = 0
z = AR1.get_bins_arr()[j]

fig = go.Figure()

xbins = np.arange(AR.a, AR.b + AR.bin_width / 2, AR.bin_width)

for n in [200000000, 20000000, 10000000, 8000000, 6000000, 4000000, 2000000]:
    ar2_samples = sample_from_uniform_x_gmm(
        z[0], z[1], sigma_spike, sigma_tails, weight_spike, n
    )
    hist, bins = np.histogram(ar2_samples, bins=xbins)
    hist_norm = hist / len(ar2_samples)

    print(f"n={n}, abs diff = {np.sum(np.abs(hist_norm - cpt[:, j, 0]))}")

fig.add_trace(
    go.Histogram(
        x=ar2_samples,
        histnorm="probability",
        xbins=dict(size=AR2.bin_width, start=AR2.a, end=AR2.b),
    )
)
# Add scatter plot with markers on ar2_dist
fig.add_trace(
    go.Bar(
        x=AR2.midbins,
        y=cpt[:, j, 0],
    )
)
fig.update_xaxes(title="Airway resistance day 2 (%)")
title = f"Numerical computation of U({z[0]}, {z[1]}) x <br>{weight_spike} Gauss(U, s={sigma_spike}) + {1-weight_spike} Gauss(U, s={sigma_tails})"
fig.update_layout(title=title, showlegend=False, width=700, height=400)
fig.show()

In [24]:
cpt_oldd = np.expand_dims(cpt_old[:, :, 0], axis=2)
cpt_oldd.shape

(45, 45, 1)

### Multi shapes

In [88]:
# mean = 1
# sigma_spike = [1, 3, 5]
# sigma_tails = [10, 30, 50]
# weight_spike = [0.5, 0.7, 0.9]
# laplace_main = False
# laplace_tail = False
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)

# mean = 1
# sigma_spike = [1]
# sigma_tails = [10]
# weight_spike = [0.7, 0.73, 0.76, 0.79, 0.81, 0.84, 0.87, 0.9, 0.93, 0.96, 0.99]
# laplace_main = False
# laplace_tail = False
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)

# mean = 1
# sigma_spike = [0.2, 0.4, 1]
# sigma_tails = [1, 4, 10]
# weight_spike = [0.9, 0.95, 1]
# laplace_main = False
# laplace_tail = False
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)

# _shape_factor_main_tail_card28
# sigma_spike = [0.01, 0.03, 0.05, 0.1]
# sigma_tails = [0.1, 0.2, 0.5, 0.8, 1.1, 1.4]
# weight_spike = [0.7]
# laplace_main = False
# laplace_tail = False
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)

# params = [
#     (0.01, 0.1, 0.7),
#     (0.01, 0.2, 0.7),
#     (0.01, 0.5, 0.7),
#     (0.01, 0.8, 0.7),
#     (0.01, 1.1, 0.7),
#     (0.01, 1.4, 0.7),
#     (0.03, 0.1, 0.7),
#     (0.03, 0.2, 0.7),
#     (0.03, 0.5, 0.7),
#     (0.03, 0.8, 0.7),
#     (0.03, 1.1, 0.7),
#     (0.03, 1.4, 0.7),
#     (0.05, 0.1, 0.7),
#     (0.05, 0.2, 0.7),
#     (0.05, 0.5, 0.7),
#     (0.05, 0.8, 0.7),
#     (0.05, 1.1, 0.7),
#     (0.05, 1.4, 0.7),
#     (0.1, 0.2, 0.7),
#     (0.1, 0.5, 0.7),
#     (0.1, 0.8, 0.7),
#     (0.1, 1.1, 0.7),
#     (0.1, 1.4, 0.7),
#     (0.15, 0.2, 0.7),
#     (0.15, 0.5, 0.7),
#     (0.15, 0.8, 0.7),
#     (0.15, 1.1, 0.7),
#     (0.15, 1.4, 0.7),
# ]

#
# sigma_spike = [0.001, 0.003, 0.007, 0.01, 0.025]
# sigma_tails = [0.01, 0.03, 0.07, 0.1, 0.15]
# weight_spike = [0.7]
# laplace_main = False
# laplace_tail = False
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)
# params = [
#     (0.001, 0.01, 0.7),
#     (0.001, 0.03, 0.7),
#     (0.001, 0.07, 0.7),
#     (0.001, 0.1, 0.7),
#     (0.001, 0.15, 0.7),
#     (0.003, 0.01, 0.7),
#     (0.003, 0.03, 0.7),
#     (0.003, 0.07, 0.7),
#     (0.003, 0.1, 0.7),
#     (0.003, 0.15, 0.7),
#     (0.007, 0.01, 0.7),
#     (0.007, 0.03, 0.7),
#     (0.007, 0.07, 0.7),
#     (0.007, 0.1, 0.7),
#     (0.007, 0.15, 0.7),
#     (0.01, 0.03, 0.7),
#     (0.01, 0.07, 0.7),
#     (0.01, 0.1, 0.7),
#     (0.01, 0.15, 0.7),
#     (0.025, 0.03, 0.7),
#     (0.025, 0.07, 0.7),
#     (0.025, 0.1, 0.7),
#     (0.025, 0.15, 0.7),
# ]

# Fit a signle laplace
# sigma_spike = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 1.5, 2]
# sigma_tails = [0.1]
# weight_spike = [1]
# laplace_main = True
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)

# Fit a signle laplace
# sigma_spike = [0.1, 0.5, 1, 1.5, 2, 4, 6, 8, 10, 15]
# sigma_tails = [0.1]
# weight_spike = [1]
# laplace_main = True
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)

# Fit a single laplace
# sigma_spike = [0.0002, 0.0004, 0.0006, 0.0008, 0.001]
# sigma_spike = [0.0008, 0.001]
# sigma_tails = [0.1]
# weight_spike = [1]
# laplace_main = True
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)

# Fit a single laplace
# sigma_spike = [1, 1.5, 2]
# sigma_tails = [0.1]
# weight_spike = [1]
# laplace_main = True
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)

# Fit a single laplace
# sigma_spike = [0.01, 0.1, 1, 2, 5]
# sigma_tails = [0.1]
# weight_spike = [1]
# laplace_main = True
# params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
# print(len(params))
# print(params)

sigma_spike = [0.01, 0.1, 0.5, 1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2, 5]
sigma_tails = [0.1]
weight_spike = [1]
laplace_main = True
params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
print(len(params))
print(params)

14
[(0.01, 0.1, 1), (0.1, 0.1, 1), (0.5, 0.1, 1), (1, 0.1, 1), (1.2, 0.1, 1), (1.3, 0.1, 1), (1.4, 0.1, 1), (1.5, 0.1, 1), (1.6, 0.1, 1), (1.7, 0.1, 1), (1.8, 0.1, 1), (1.9, 0.1, 1), (2, 0.1, 1), (5, 0.1, 1)]


In [89]:
AR1 = mh.VariableNode("Airway resistance day 1 (%)", 0, 90, 2, prior=None)
AR2 = mh.VariableNode("Airway resistance day 2 (%)", 0, 90, 2, prior=None)
S = mh.DiscreteVariableNode("AR change factor shape", 1, len(params), 1, prior=None)

cpt = np.zeros((AR2.card, AR1.card, S.card))

main_is_laplace = laplace_main
for s, (sigma_spike, sigma_tails, weight_spike) in enumerate(params):
    print(f"Parameters: sigma_spike = {sigma_spike}")

    pdf = np.zeros(AR2.card)
    for i, z in enumerate(AR2.get_bins_arr()):
        y = AR1.get_bins_arr()[0]
        pdf[i] = ar.p_uniform_x_gmm(
            z[0],
            z[1],
            y[0],
            y[1],
            sigma_spike,
            sigma_tails,
            weight_spike,
            main_is_laplace,
        )

    # Shift and associate to the next bin
    for i in range(AR2.card):
        for j in range(AR1.card):
            if i >= j:
                cpt[i, j, s] = pdf[i - j]
            else:
                cpt[i, j, s] = pdf[j - i]
    # Then normalise
    cpt /= cpt.sum(axis=0)

Parameters: sigma_spike = 0.01



The maximum number of subdivisions (50) has been achieved.
  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.






invalid value encountered in divide



Parameters: sigma_spike = 0.1
Parameters: sigma_spike = 0.5
Parameters: sigma_spike = 1
Parameters: sigma_spike = 1.2
Parameters: sigma_spike = 1.3
Parameters: sigma_spike = 1.4
Parameters: sigma_spike = 1.5
Parameters: sigma_spike = 1.6
Parameters: sigma_spike = 1.7
Parameters: sigma_spike = 1.8
Parameters: sigma_spike = 1.9
Parameters: sigma_spike = 2
Parameters: sigma_spike = 5


In [None]:
# Test middle, right
i = 20
y = AR1.get_bins_arr()[i]
pdf = np.zeros(AR2.card)
for j, z in enumerate(AR2.get_bins_arr()):
    pdf[j] = ar.p_uniform_x_gmm(
        z[0], z[1], y[0], y[1], sigma_spike, sigma_tails, weight_spike, laplace_main
    )
pdf = pdf / np.sum(pdf)
assert np.allclose(pdf, cpt[:, i, 0]), f"pdf={pdf}\ncpt={cpt[:, i, 0]}"

i = -1
y = AR1.get_bins_arr()[i]
pdf = np.zeros(AR2.card)
for j, z in enumerate(AR2.get_bins_arr()):
    pdf[j] = ar.p_uniform_x_gmm(
        z[0], z[1], y[0], y[1], sigma_spike, sigma_tails, weight_spike, laplace_main
    )
pdf = pdf / np.sum(pdf)
assert np.allclose(pdf, cpt[:, i, 0]), f"pdf={pdf}\ncpt={cpt[:, i, 0]}"

In [90]:
S = mh.DiscreteVariableNode("AR change factor shape", 1, len(params), 1, prior=None)

# cpth.save_cpt([AR, AR, S], cpt, f"_shape_factor_main_tail_card{len(params)}")
cpth.save_cpt([AR, AR, S], cpt, f"_shape_factor_single_laplace_card{len(params)}_final")

In [12]:
cpttmp = cpth.get_cpt([AR, AR, S], f"_shape_factor_single_laplace_card{len(params)}")

In [71]:
cpttmp = cpt

In [84]:
s = 4
j = 20
z = AR1.get_bins_arr()[j]

fig = make_subplots(rows=2, cols=1, shared_xaxes=True)
# Add scatter plot with markers on ar2_dist
fig.add_trace(
    go.Bar(
        x=AR2.midbins,
        y=cpttmp[:, j, s],
    ),
    row=1,
    col=1,
)
# Compute the cumulative distribution for cpttmp[:, j, s]
below = cpttmp[:, j, s]
above = cpttmp[:, j, s]
print(f"len(below) = {len(below)}, len(above) = {len(above)}")
abs_p = below + (1 - above)[::-1]

cum_dist = np.cumsum(cpttmp[:, j, s])
fig.add_trace(go.Bar(x=AR2.midbins, y=cum_dist), row=2, col=1)
fig.update_xaxes(title="Airway resistance day 2 (%)")
# title = f"Numerical computation of U({z[0]}, {z[1]}) x <br>{weight_spike} Gauss(U, s={sigma_spike}) + {1-weight_spike} Gauss(U, s={sigma_tails})"
title = f"Numerical computation of U({z[0]}, {z[1]})<br>laplace width={params[s][0]}"
fig.update_layout(title=title, showlegend=False, width=600, height=400)
fig.show()

len(below) = 45, len(above) = 45


# Identity change factor

In [20]:
cpt = np.ones((AR2.card, AR1.card, 1))

# P(AR2 | AR1 = ar_i) is o everywhere except for AR2 = ar_i

# Create this diagonal matrix
cpt[:, :, 0] = np.diag(np.ones(AR2.card))

In [21]:
S = mh.DiscreteVariableNode("AR change factor shape", 1, 1, 1, prior=None)

cpt_maybe_wrong = cpth.get_cpt([AR, AR, S], f"_shape_factor_identity")

In [17]:
# Test on random AR
ar1 = np.random.random(AR1.card)
ar1 /= np.sum(ar1)
ar2 = np.matmul(ar1, cpt[:, :, 0])

print(f"ar1 = {ar1}\nar2 = {ar2}")
assert np.allclose(ar2, ar1), f"ar1 = {ar1}\nar2 = {ar2}"

ar1 = [0.02949168 0.01805679 0.00786703 0.04235405 0.00573053 0.0016814
 0.03381475 0.01454415 0.01945188 0.04040868 0.02720276 0.02307735
 0.03558133 0.00567007 0.04200725 0.0396694  0.0272642  0.00931346
 0.03703972 0.00880981 0.01776048 0.01591145 0.03384717 0.00450511
 0.01138355 0.01399403 0.02338927 0.02364774 0.02883576 0.01033966
 0.04213541 0.03675268 0.02684946 0.03919579 0.0016599  0.02182905
 0.00823593 0.03458868 0.00838177 0.04319164 0.01231586 0.01205855
 0.0038114  0.0430604  0.01328299]
ar2 = [0.02949168 0.01805679 0.00786703 0.04235405 0.00573053 0.0016814
 0.03381475 0.01454415 0.01945188 0.04040868 0.02720276 0.02307735
 0.03558133 0.00567007 0.04200725 0.0396694  0.0272642  0.00931346
 0.03703972 0.00880981 0.01776048 0.01591145 0.03384717 0.00450511
 0.01138355 0.01399403 0.02338927 0.02364774 0.02883576 0.01033966
 0.04213541 0.03675268 0.02684946 0.03919579 0.0016599  0.02182905
 0.00823593 0.03458868 0.00838177 0.04319164 0.01231586 0.01205855
 0.0038114  0.043

In [None]:
cpth.save_cpt([AR, AR, S], cpt, f"_shape_factor_identity")

# GMM archive

In [None]:
import numpy as np
from scipy.stats import t, norm

mean = 0.5
sigma_spike = 0.5
sigma_tails = 5  # 20,
weight_spike = 0.7  # Between 0.7 and 0.3


def get_pdf(
    x,
    mean,
    sigma_spike,
    sigma_tails,
    weight_spike,
    laplace_main=False,
    laplace_tails=False,
):
    if laplace_main:
        pdf_spike = ar.pdf_laplace(x, mean, sigma_spike)
    else:
        pdf_spike = norm.pdf(x, mean, sigma_spike)
    if laplace_tails:
        pdf_tails = ar.pdf_laplace(x, mean, sigma_tails)
    else:
        pdf_tails = norm.pdf(x, mean, sigma_tails)
    pdf = weight_spike * pdf_spike + (1 - weight_spike) * pdf_tails

    pdf /= np.sum(pdf)
    return pdf


pdf = get_pdf(AR.midbins, mean, sigma_spike, sigma_tails, weight_spike, True, True)

# Same plot with plotly xpress
fig = go.Figure()
fig.add_trace(go.Scatter(x=AR.midbins, y=pdf, mode="markers+lines"))
# fig.update_yaxes(range=[-0.01, max(pdf) + 0.01])
# log x axis
# fig.update_yaxes(type="log")
fig.show()

In [None]:
print(f"I can run {np.power(24,1/3)} experiments in 8h for the 3 days case scenario")

In [None]:
# Build CPT

# Longest taking 1300s = 20 min

# 1: gris serach on 3 params
mean = 1
sigma_spike = [1, 3, 5]
sigma_tails = [10, 30, 50]
weight_spike = [0.5, 0.7, 0.9]
laplace_main = False
laplace_tail = False
params = list(itertools.product(sigma_spike, sigma_tails, weight_spike))
print(len(params))
print(params)

# 2: focus on the main std
# mean = 1
# sigma_spike = [0.5, 1, 1.5, 2, 2.5]
# weight_spike = 0.7
# sigma_tails = 30
# laplace = [False, True]

# params = list(itertools.product(sigma_spike, laplace))
# print(len(params))
# print(params)

# 3: focus on the tails std
# mean = 1
# sigma_spike = 0.5
# weight_spike = 0.7
# sigma_tails = [5, 15, 30]
# laplace_main = True
# laplace_tail = True

# params = sigma_tails
# print(len(params))
# print(params)

# 4: using best matching set from 1, finely optimise the weight
# Vary the weight
# mean = 1
# sigma_spike = 0.2
# weight_spike = [0.59, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.81, 0.84, 0.87, 0.9, 0.93, 0.96, 0.99]
# sigma_tail = 30
# laplace_main = False
# laplace_tail = False
# params = weight_spike
# print(len(params))
# print(params)

# 5: using best matchin weight, finely optimise the spike std
# mean = 1
# sigma_spike = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
# weight_spike = 0.73
# sigma_tail = 30
# laplace_main = False
# laplace_tail = False
# params = sigma_spike
# print(len(params))
# print(params)

# 6: using best matchin weight and spike std, finely optimise the tail std
# mean = 0.5
# # mean = 1
# sigma_spike = 0.2
# weight_spike = 0.73
# sigma_tail = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70]
# laplace_main = False
# laplace_tail = False
# params = sigma_tail
# print(len(params))
# print(params)

cpt = np.zeros((AR2.card, AR1.card, len(params)))

for s in range(len(params)):
    sigma_tail = params[s]
    # sigma_spike, sigma_tails, weight_spike = params[s]
    pdf = get_pdf(
        AR.midbins,
        mean,
        sigma_spike,
        sigma_tail,
        weight_spike,
        laplace_main,
        laplace_tail,
    )

    for i in range(AR1.card):
        for j in range(AR2.card):
            if j >= i:
                cpt[j, i, s] = pdf[j - i]
            else:
                cpt[j, i, s] = pdf[i - j]

In [None]:
AR = mh.VariableNode("Airway resistance (%)", 0, 90, 2, prior=None)
AR1 = mh.VariableNode("Airway resistance day 1 (%)", 0, 90, 2, prior=None)
AR2 = mh.VariableNode("Airway resistance day 2 (%)", 0, 90, 2, prior=None)
S = mh.DiscreteVariableNode("AR change factor shape", 1, len(params), 1)

# cpth.save_cpt([AR, AR, S], cpt, f"_shape_factor{S.card}_stdtail")

## Final Gaussian CPT

In [None]:
# Finale Gaussian CPT

# mean = 1
mean = 0.5
sigma_spike = 0.2
weight_spike = 0.73
sigma_tail = 10
laplace_main = False
laplace_tail = False

S = mh.DiscreteVariableNode("AR change factor shape", 1, 1, 1, prior=None)

cpt = np.zeros((AR2.card, AR1.card, S.card))

pdf = get_pdf(
    AR.midbins,
    mean,
    sigma_spike,
    sigma_tail,
    weight_spike,
    laplace_main,
    laplace_tail,
)

for i in range(AR1.card):
    for j in range(AR2.card):
        if j >= i:
            cpt[j, i, 0] = pdf[j - i]
        else:
            cpt[j, i, 0] = pdf[i - j]

## Get previous ctp that gave good results

In [None]:
S = mh.DiscreteVariableNode("AR change factor shape", 2, 10, 2, prior=None)

cpt_old = cpth.get_cpt([AR, AR, S], f"_shape_factor_what_is_it")

In [None]:
bin = 22
fig = go.Figure()
# fig.add_trace(go.Scatter(x=AR.midbins, y=cpt[:, bin, 0], mode="markers+lines"))
fig.add_trace(go.Bar(x=AR.midbins, y=cpt_old[:, bin, 0]))
# fig.update_yaxes(range=[-0.01, max(pdf) + 0.01])
# Add AR.name on x axis
fig.update_xaxes(title=AR.name)
title = f"P(AR2 |AR1 = {AR.get_bins_str()[bin]})"
fig.update_layout(title=title, showlegend=False, width=700, height=400)
fig.show()

## ecFEV1 change in data

In [None]:
import data.helpers as dh

In [None]:
df = bd.load_meas_from_excel("BR_O2_FEV1_FEF2575_conservative_smoothing_with_idx")

In [None]:
diff_dump = np.array([])

for id in df.ID.unique():
    dftmp, _, _ = dh.find_longest_conseq_sequence(df[df.ID == id], n_missing_days_allowed=2)
    dftmp["ecFEV1 shifted"] = dftmp["ecFEV1"].shift(1)
    dftmp["ecFEV1 diff"] = dftmp["ecFEV1 shifted"] - dftmp["ecFEV1"]
    diff = dftmp["ecFEV1 diff"].dropna().values
    diff_dump = np.concatenate((diff_dump, diff))

In [None]:
# Histogram plot of diff_dump using go figure
fig = go.Figure()
# Add histogram plot with 50 bins
xbins = dict(start=-2.05, end=2, size=0.1)
fig.add_trace(go.Histogram(x=diff_dump, xbins=xbins))
# Add title and x axis label
fig.update_layout(
    title="Histogram of ecFEV1 diff", xaxis_title="ecFEV1 diff", width=700, height=400
)
fig.show()

In [None]:
# Validate CPT
fig = go.Figure()
# Add scatter plot with markers on ar2_dist
fig.add_trace(
    go.Scatter(
        x=AR2.midbins,
        y=cpt[:, 0, 0],
        mode="markers+lines",
        marker=dict(size=5, color="black"),
        line=dict(width=1.5, color="black"),
    )
)
fig.update_xaxes(title="Airway resistance day 2 (%)")
title = f""
fig.update_layout(title=title, showlegend=False, width=700, height=400)
fig.show()

# Laplace

## Uniform + laplace dist convolution validation

In [None]:
import scipy.integrate as integrate


def pdf_laplace(x, mu, s):
    return 1 / (2 * s) * np.exp(-np.abs(x - mu) / s)


def sampler_uniform_x_laplace(n, a, b, s):
    """
    Y ~ U(a, b)
    Z ~ Laplace(mu=Y, s=s)
    """
    y_samples = np.random.uniform(a, b, n)
    z_samples = np.random.laplace(y_samples, s)
    return y_samples, z_samples


n = 1000000
ar_down, ar_up = AR1.get_bins_arr()[20]
shape = 5
AR1_samples, AR2_samples = sampler_uniform_x_laplace(n, ar_down, ar_up, shape)
# Remove samples outside AR2 range
AR2_samples = AR2_samples[(AR2_samples >= AR2.a) & (AR2_samples <= AR2.b)]

# Create plot histogram plot for AR2 samples using go figure
fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=AR2_samples,
        histnorm="probability",
        xbins=dict(size=AR2.bin_width, start=AR2.a, end=AR2.b),
    )
)


def pdf_uniform_x_laplace(z1, y1, y2, s, abserr_tol=1e-10):
    """
    Y ~ U(y1, y2)
    Z ~ Laplace(mu=Y, s=s)
    Returns P(z=z1| y1 < y < y2)
    """

    def conv_fn(y, z, s):
        return pdf_laplace(z, y, s) / (y2 - y1) / y

    val, abserr = integrate.quad(conv_fn, y1, y2, args=(z1, s))
    if abserr > abserr_tol:
        raise ValueError(
            f"Absolute error after solving the integral is too high {abserr}"
        )

    return val


p_ar2 = np.array([pdf_uniform_x_laplace(z, ar_down, ar_up, shape) for z in AR2.midbins])
p_ar2 = np.array(p_ar2) / np.sum(p_ar2)

# Add scatter plot with markers on ar2_dist
fig.add_trace(
    go.Scatter(
        x=AR2.midbins,
        y=p_ar2,
        mode="markers+lines",
        marker=dict(size=5, color="black"),
        line=dict(width=1.5, color="black"),
    )
)
fig.update_xaxes(title="Airway resistance day 2 (%)")
title = f"Validation of numerical computation of U({ar_down}, {ar_up}) x Laplace(U, s={shape})<br>against sampling (n={n})"
fig.update_layout(title=title, showlegend=False, width=700, height=400)
fig.show()

In [None]:
# Get P(z1 < z < z2 | y1 < y < y2)

import scipy.integrate as integrate


def pdf_laplace(x, mu, s):
    return 1 / (2 * s) * np.exp(-np.abs(x - mu) / s)


def sampler_uniform_x_laplace(n, a, b, s):
    """
    Y ~ U(a, b)
    Z ~ Laplace(mu=Y, s=s)
    """
    y_samples = np.random.uniform(a, b, n)
    z_samples = np.random.laplace(y_samples, s)
    return y_samples, z_samples


n = 1000000
ar_down, ar_up = AR1.get_bins_arr()[22]
shape = 5
AR1_samples, AR2_samples = sampler_uniform_x_laplace(n, ar_down, ar_up, shape)
# Remove samples outside AR2 range
AR2_samples = AR2_samples[(AR2_samples >= AR2.a) & (AR2_samples <= AR2.b)]


def p_uniform_x_laplace(z1, z2, y1, y2, s, abserr_tol=1e-10, debug=True):
    """
    Y ~ U(y1, y2)
    Z ~ Laplace(mu=Y, s=s)
    Returns P(z1 < z < z2 | y1 < y < y2)
    """

    def conv_fn(z, y, s):
        return pdf_laplace(z, y, s) / (y2 - y1) / y

    val, abserr = integrate.dblquad(
        conv_fn, y1, y2, z1, z2, args=[s], epsabs=abserr_tol
    )
    if abserr > abserr_tol and debug:
        print(
            f"Warning - Absolute error after solving the integral is too high {abserr}, z1 = {z1}, z2 = {z2}, y1 = {y1}, y2 = {y2}"
        )

    return val


p_ar2 = np.array(
    [
        p_uniform_x_laplace(z1, z2, ar_down, ar_up, shape)
        for z1, z2 in AR2.get_bins_arr()
    ]
)
p_ar2 = np.array(p_ar2) / np.sum(p_ar2)


# Create plot histogram plot for AR2 samples using go figure
fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=AR2_samples,
        histnorm="probability",
        xbins=dict(size=AR2.bin_width, start=AR2.a, end=AR2.b),
    )
)
# Add scatter plot with markers on ar2_dist
fig.add_trace(
    go.Scatter(
        x=AR2.midbins,
        y=p_ar2,
        mode="markers+lines",
        marker=dict(size=5, color="black"),
        line=dict(width=1.5, color="black"),
    )
)
fig.update_xaxes(title="Airway resistance day 2 (%)")
title = f"Validation of numerical computation of U({ar_down}, {ar_up}) x Laplace(U, s={shape})<br>against sampling (n={n})"
fig.update_layout(title=title, showlegend=False, width=700, height=400)
fig.show()

# Build CPT

In [None]:
# Build CPT

cpt = np.zeros((AR2.card, AR2.card, S.card))


def distribute_pdf_along_child_var(var, pdf, pdf_peek_idx):
    """
    Use when the child and the parent vars have the same parameters
    pdf is a distribution of the noise for a bin of the parent's var
    The noise is always the same
    This function will shift the pdf along the child's bins to create a CPT

    Typically pdf_peek_idx = var.card // 2
    """
    cpt = np.zeros((var.card, var.card))

    for bin_idx, bin in enumerate(var.get_bins_arr()):
        pdf_trunc = np.zeros(var.card)
        peek_diff = pdf_peek_idx - bin_idx
        if peek_diff == 0:
            pdf_trunc = pdf
        elif peek_diff > 0:
            pdf_trunc[0:-peek_diff] = pdf[peek_diff:]
        else:
            pdf_trunc[-peek_diff:] = pdf[:peek_diff]
        # Norm the pdf
        pdf_trunc /= np.sum(pdf_trunc)
        cpt[:, bin_idx] = pdf_trunc
    return cpt


for s_idx, s in enumerate(S.values):
    # Compute p_ar2 for the middle bin of AR1
    middle_bin = AR1.card // 2
    ar_down, ar_up = AR1.get_bins_arr()[middle_bin]
    # Compute p_ar2, for a variable 3 times wider than AR2
    # AR_mock = mh.VariableNode("Mock var", -90, 180, 2, prior=None)
    p_ar2 = np.array(
        [
            p_uniform_x_laplace(z1, z2, ar_down, ar_up, s)
            for z1, z2 in AR2.get_bins_arr()
        ]
    )
    p_ar2 = np.array(p_ar2) / np.sum(p_ar2)
    cpt[:, :, s_idx] = distribute_pdf_along_child_var(AR2, p_ar2, middle_bin)

# Issue: when sliding the pdf to the left, values on the right are wrong, and vice versa

Since at max you slide by 45 units, need to add 45 ot the left and to the right: -45; 90

Since the distribution is symmetric, you can just do 0, 90

In [None]:
# Create plot histogram plot for AR2 samples using go figure
fig = go.Figure()
# Add scatter plot with markers on ar2_dist
fig.add_trace(
    go.Scatter(
        x=AR2.midbins,
        y=cpt[:, 10, 4],
        mode="markers+lines",
        marker=dict(size=5, color="black"),
        line=dict(width=1.5, color="black"),
    )
)
fig.update_xaxes(title="Airway resistance day 2 (%)")
title = f"Validation of numerical computation of U({ar_down}, {ar_up}) x Laplace(U, s={shape})<br>against sampling (n={n})"
fig.update_layout(title=title, showlegend=False, width=700, height=400)
fig.show()

In [None]:
cpth.save_cpt([AR, AR, S], cpt, "_shape_factor")