In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions

In [None]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

In [None]:
columns_X = ['el_rawcl_Es0', 'el_rawcl_Es1', 'el_rawcl_Es2', 'el_rawcl_Es3', 'el_rawcl_E', 'el_cl_aeta', 'el_f0']
column_y = 'el_erawOverEtrue'

normalizer = tf.keras.layers.Normalization()
# this will take a while since it needs to read all your data and compute the mean and the variabnce
# normalization is non-trainable layer, it must be run before the training
normalizer.adapt(np.array(df_train[columns_X]))

normalizer.mean, normalizer.variance

In [None]:
print(tf.math.reduce_mean(normalizer(df_train[columns_X]), axis=0))
print(tf.math.reduce_variance(normalizer(df_train[columns_X]), axis=0))


In [None]:
import scipy

def my_mixture(x, *args):
    m1, m2, m3, m4, m5, s1, s2, s3, s4, s5, k1, k2, k3, k4, k5 = args
    ret = k1 * scipy.stats.norm.pdf(x, loc=m1 ,scale=s1)
    ret += k2 * scipy.stats.norm.pdf(x, loc=m2 ,scale=s2)
    ret += k3 * scipy.stats.norm.pdf(x, loc=m3 ,scale=s3)
    ret += k4 * scipy.stats.norm.pdf(x, loc=m4 ,scale=s4)
    ret += k5 * scipy.stats.norm.pdf(x, loc=m5 ,scale=s5)
    return ret / 5.


params = [1, 1, 1, 1, 1, 0.1, 0.1, 0.1, 0.1, 0.1, 1, 1, 1, 1, 1]

xspace = np.linspace(0.4, 1.3, 200)
y, x = np.histogram(df_train[column_y], bins=xspace)
xmid = 0.5 * (x[1:] + x[:-1])

fitted_params,_ = scipy.optimize.curve_fit(my_mixture, xmid, y, p0=params)
my_mixture_fitted = lambda x: my_mixture(x, *fitted_params)
integral = scipy.integrate.quad(my_mixture_fitted, 0.4, 1.3)[0]
my_mixture_normalized = lambda x: my_mixture_fitted(x) / integral

fig, ax = plt.subplots()

ax.plot(xmid, my_mixture_normalized(xmid))
ax.hist(df_train[column_y], bins=xspace, density=True)
plt.show()

In [None]:
-np.sum(np.log(my_mixture_normalized(df_train[column_y]))) / len(df_train)

In [None]:
event_shape = [1]
num_components = 3
params_size = tfp.layers.MixtureNormal.params_size(num_components, event_shape)

In [None]:


pdf_template = lambda t: tfd.Normal(loc=t[..., :1],
                           scale=tf.math.softplus(t[..., 1:])
                           #scale = tf.abs(2 + t[..., 1:])
                          )

model = tf.keras.Sequential([
    tf.keras.Input(shape=len(columns_X)),
    normalizer,    
    tf.keras.layers.Dense(100, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.2), 
    tf.keras.layers.Dense(20, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dense(params_size, activation=None),
    tfp.layers.MixtureNormal(num_components, event_shape),
    #tfp.layers.MixtureSameFamily(num_components, tfp.layers.IndependentNormal(event_shape))
])

tf.keras.utils.plot_model(model, show_shapes=True)

In [None]:
#alpha = Dense(k_mixt, activation=tf.nn.softmax)(hidden)
#mu = Dense(k_mixt, activation=None)(hidden)
#sigma = Dense(k_mixt, activation=tf.nn.softplus,name='sigma')(hidden)

#gm = tfd.MixtureSameFamily(
#mixture_distribution=tfd.Categorical(
#probs=alpha),
#components_distribution=tfd.Normal(
#loc=mu, 
#scale=sigma))

In [None]:
model.summary()

In [None]:
negloglik = lambda y, p_y: -p_y.log_prob(y)# - tfd.Normal(loc=2, scale=2).log_prob(p_y.scale)

#negloglik(df_train.head(10).mass, model(df_train.head(10)[input_columns].values))

In [None]:
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss=negloglik)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
history = model.fit(df_train[columns_X].values, df_train[column_y].values,
                    epochs=20, verbose=True, batch_size=1024, validation_split=0.2, callbacks=[callback])

In [None]:
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()

In [None]:
yhat = model(df_test[columns_X].values)
df_results_tf = df_test.copy()
#df_results_tf['muCB'] = yhat.loc.numpy().flatten()
#df_results_tf['sigmaCB'] = yhat.scale.numpy().flatten()

In [None]:
yhat.parameter_properties()

In [None]:
fig, axs = plt.subplots(1, 2)

bins = np.linspace(0.5, 1.2, 100)
axs[0].hist(yhat.mean().numpy().flatten(), bins=bins, density=True)
axs[0].hist(df_test['el_erawOverEtrue'], bins=bins, density=True, histtype='step')

axs[1].hist(np.sqrt(yhat.variance().numpy().flatten()), bins=100)
plt.show()

In [None]:
yhat.components_distribution.mean().numpy()[:, :, 0]

In [None]:
fig, ax = plt.subplots()
bins = np.linspace(0.5, 1.2, 100)
ax.hist(1./yhat.mean().numpy().flatten() * df_results_tf['el_rawcl_E'] / df_results_tf['el_truth_E'], label='NN', bins=bins, density=True)
ax.hist(df_results_tf['el_erawOverEtrue'], label='raw', bins=bins, density=True)
ax.legend(loc=0)

In [None]:
alphas = yhat.mixture_distribution.probs_parameter().numpy()
means = yhat.components_distribution.mean().numpy()[:, :, 0]
variances = yhat.components_distribution.variance().numpy()[:, :, 0]

In [None]:
xspace = np.linspace(0.2, 1.3, 100)
ysum = np.zeros_like(xspace)
for i in range(500):
    xx = model(df_test[columns_X].iloc[i].values).tensor_distribution
    y = xx.prob(xspace.reshape(-1, 1))
    ysum += y
    
fig, ax = plt.subplots()
ax.plot(xspace, ysum / 500)
ax.hist(df_test[column_y], bins=xspace, density=True)
plt.show()

In [None]:
xspace = np.linspace(0.2, 1.3, 100)
idx = 0
y = scipy.stats.norm(means[idx], np.sqrt(variances[1])).pdf(np.tile(xspace, (3, 1)).T)
y = (y * alphas[idx]).sum(axis=1)
plt.plot(xspace, y)

In [None]:
-np.log(scipy.stats.norm(means, np.sqrt(variances)).pdf(np.tile(df_test[column_y].values, (2, 1)).T).sum(axis=1)).mean()

In [None]:
means

In [None]:
from scipy import stats

xspace = np.linspace(0.5, 1.2, 10)

components = []
for mean, variance in zip(means.T, variances.T):
    print(mean.shape)
    print(variance.shape)
    stats.norm(mean, variance).pdf(xspace)

In [None]:
xspace = np.linspace(0.5, 1.2, 10)
xmidpoints = 0.5 * (xspace[1:] + xspace[:-1])
all_pdf = np.exp(yhat.log_prob(xspace))

In [None]:
xx = tf.convert_to_tensor(np.array([[0, 0.1]]).T, dtype=np.float32)

In [None]:
yhat.log_prob(xx)

In [None]:
plt.plot(xmidpoints, all_pdf[0])
plt.plot(xmidpoints, all_pdf[1])
plt.plot(xmidpoints, all_pdf[2])

In [None]:
fig, ax = plt.subplots()
df_test['el_erawOverEtrue'].hist(ax=ax, grid=False, bins=xspace)
ax.plot(xmidpoints, all_pdf.sum(axis=0))

In [None]:
tfd = tfp.distributions
tfpl = tfp.layers
tfk = tf.keras
tfkl = tf.keras.layers

# Load data -- graph of a [cardioid](https://en.wikipedia.org/wiki/Cardioid).
n = 2000
t = tfd.Uniform(low=-np.pi, high=np.pi).sample([n, 1])
r = 2 * (1 - tf.cos(t))
x = r * tf.sin(t) + tfd.Normal(loc=0., scale=0.1).sample([n, 1])
y = r * tf.cos(t) + tfd.Normal(loc=0., scale=0.1).sample([n, 1])

# Model the distribution of y given x with a Mixture Density Network.
event_shape = [1]
num_components = 2
params_size = tfpl.MixtureSameFamily.params_size(
    num_components,
    component_params_size=tfpl.IndependentNormal.params_size(event_shape))
model = tfk.Sequential([
      tfkl.Dense(1024, activation='relu'),

  tfkl.Dense(128, activation='relu'),
    tfkl.Dense(64, activation='relu'),
  tfkl.Dense(params_size, activation=None),
  tfpl.MixtureSameFamily(num_components, tfpl.IndependentNormal(event_shape)),
])

# Fit.
batch_size = 100
model.compile(optimizer='adam',
              loss=lambda y, model: -model.log_prob(y))
model.fit(x, y,
          batch_size=batch_size,
          epochs=20,
          steps_per_epoch=n // batch_size)

In [None]:
plt.plot(x.numpy().flatten(), model.predict(x).flatten(), '.')
plt.plot(x.numpy().flatten(), y, '.')
plt.plot(x.numpy().flatten(), model(x).mean().numpy().flatten(), '.')

In [None]:
xx = model(x[0]).tensor_distribution
plt.plot(xx.log_prob(np.linspace(-4, 1, 100).reshape(-1, 1)))