## Library Import

In [None]:
# Data Viz. 
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import pandas as pd
import seaborn as sns
# sns.set_palette(palette='deep')
# sns_c = sns.color_palette(palette='deep')
%matplotlib inline
from sklearn.linear_model import LinearRegression

## Data load and plot

In [None]:
df = pd.read_csv("backprop.csv")                  
df.head()

In [None]:
X_data = df.iloc[:,0]
y_data = df.iloc[:,1]
plt.figure(figsize=(4,6))
plt.scatter(X_data, y_data,color='g' ,s = 20, alpha = 0.5, label='sample data')
plt.title('Advertising Dataset'); 
plt.xlabel('TV',fontsize=14); 
plt.ylabel('Sales',fontsize=14)
plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.0, wspace=0.2, hspace=0.2)
plt.legend()
plt.show()

## Baseline neural network regression

## TFP method

### Convert data 

In [None]:
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions

In [None]:
# Set seed.
tf.random.set_seed(42)
# Set tensor numeric type.
dtype = 'float32'

In [None]:
x = tf.convert_to_tensor(X_data, dtype=dtype)
x = tf.reshape(x,(-1,1))
y = tf.convert_to_tensor(y_data, dtype=dtype)
y = tf.reshape(y, (-1, 1))

## Make Bayesian random variables 

In [None]:
# Input x
# Hidden layer with two activations
# Output with bias
# Total weights = 5
def basic_nn(w0,w1,b1):
    h1 = tf.matmul(x,w0)
    a1 = tf.math.sin(h1)
    h2 = tf.matmul(a1,w1) + b1
    y = tf.math.sin(h2)
    return y

In [None]:
jds_ab = tfd.JointDistributionNamedAutoBatched(dict(

    bias=tfd.Normal(
        loc=[tf.cast(0.0, dtype)], 
        scale=[tf.cast(1.0, dtype)]
    ),
    
    weights0 = tfd.Normal(
    loc=[[tf.cast(0.0, dtype),tf.cast(0.0,dtype)]], 
    scale=[[tf.cast(1.0, dtype),tf.cast(10.0,dtype)]]
    ),

    weights1=tfd.Normal(
    loc=[[tf.cast(0.0, dtype)], [tf.cast(0.0, dtype)]], 
    scale=[[tf.cast(1.0, dtype)], [tf.cast(10.0, dtype)]]
    ),
    
 

    y= lambda weights0,weights1,bias: 
        tfd.Normal(
            loc=basic_nn(weights0,weights1,bias), 
            scale=1
        ) 
))

### Get probabilities

In [None]:
def target_log_prob_fn(weights0=None, weights1=None,bias=None):
    return jds_ab.log_prob(weights0=weights0,weights1=weights1, bias=bias, y=y)

In [None]:
# Size of each chain.
num_results = int(3.5e4)
# Burn-in steps.`
num_burnin_steps = int(1.5e4)

# Hamiltonian Monte Carlo transition kernel. 
# In TFP a TransitionKernel returns a new state given some old state.
hcm_kernel  = tfp.mcmc.HamiltonianMonteCarlo(
  target_log_prob_fn=target_log_prob_fn,
  step_size=1.0,
  num_leapfrog_steps=3
  
)

# This adapts the inner kernel's step_size.
adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(
  inner_kernel = hcm_kernel,
  num_adaptation_steps=int(num_burnin_steps * 0.8)
)

# Run the chain (with burn-in).
@tf.function
def run_chain():
  # Run the chain (with burn-in). 
  # Implements MCMC via repeated TransitionKernel steps.
  samples, is_accepted = tfp.mcmc.sample_chain(
      num_results=num_results,
      num_burnin_steps=num_burnin_steps,
      current_state=[
          tf.convert_to_tensor([[1.0,1.0]], dtype=dtype),
          tf.convert_to_tensor([[1.0],[1.0]], dtype=dtype),
          tf.convert_to_tensor([1.0], dtype=dtype), 
      ],
      kernel=adaptive_hmc,
      trace_fn=lambda _, pkr: pkr.inner_results.is_accepted
    )
  return samples

In [None]:
# Set number of chains. 
num_chains = 5
# Run sampling. 
chains = [run_chain() for i in range(num_chains)]

## Framing and plotting

In [None]:
# Separating the betas
chains_t = list(map(list, zip(*chains)))

# Combining all samples 
chains_samples = [tf.squeeze(tf.concat(samples, axis=0)) for samples in chains_t]

In [None]:
# Creating a pandas dataframe with the betas
chains_df = pd.concat(
    objs=[pd.DataFrame(samples.numpy()) for samples in chains_samples], 
    axis=1
)
params = ['w11','w12','w21','w22','b2']
chains_df.columns = params

In [None]:
# Creating a sample_id (which is 0-10500)
# Then creating chain_sample_id(rolling from 0-35000)
# Then creating chain_id (c_1 or c_2 or c_3)
chains_df = chains_df \
    .assign(
        sample_id=lambda x: range(x.shape[0]), 
        chain_sample_id=lambda x: x['sample_id'] % num_results,
        chain_id=lambda x: (x['sample_id'] / num_results).astype(int) + 1
    ) \
    .assign(chain_id=lambda x: 'c_' + x['chain_id'].astype(str)) \
    
chains_df.head()

In [None]:
# Plotting
fig, axes = plt.subplots(nrows=len(params), ncols=2, figsize=(10, 8), constrained_layout=True)

# Plotting for each parameter
for i, param in enumerate(params):
    sns.histplot(x=param, data=chains_df, hue='chain_id', kde=True, ax=axes[i][0])
    sns.lineplot(x='chain_sample_id', y=param, data=chains_df, hue='chain_id', alpha=0.3, legend=False, ax=axes[i][1])

fig.suptitle('Posterior Samples per Chain', y=1.03);