In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
from statsmodels.graphics.tsaplots import plot_acf

import jax
import jax.numpy as jnp
from jax import grad, jit, vmap
from jax.scipy.stats.multivariate_normal import logpdf as jlogpdf

# Question 1: Reading and formatting dataset

In [None]:
#1.1
dataset = pd.read_csv("GermanCredit.txt", sep ="\s+", header=None)
dataset

In [None]:
#1.2
dataset[24] = dataset[24] - 1
dataset

In [None]:
M = 800
d = 24
length = dataset.shape[0]
y_train = dataset.loc[:M-1, d]
y_test = dataset.loc[M:, d]

x_train = dataset.loc[:M-1, :d-1]
x_test = dataset.loc[M:, :d-1]

In [None]:
#1.3
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape

In [None]:
#1.4
ones_train = jnp.ones((M, 1))
ones_test = jnp.ones((length-M, 1))
x_train = jnp.concatenate((ones_train, x_train), axis=1)
x_train.shape

In [None]:
x_test = jnp.concatenate((ones_test, x_test), axis=1)
x_test

In [None]:
jnp.mean(x_train, axis=0), jnp.std(x_train, axis=0)

# Question 2: Model specification

In [None]:
#2.1  - math needed


In [None]:
#2.2 - explanation needed

In [None]:
#2.3 - is it in terms of prob or vars >= 0

In [None]:
#2.4 - math needed: Is there an error?


In [None]:
#2.5 - math and speed comparison needed
def log_likelihood(beta):
    x_beta = np.matmul(x_train, beta)
    output = np.sum(y_train * x_beta - np.log(1 + np.exp(x_beta)))
    return output

@jit
def log_likelihood_jax(beta):
    x_beta = jnp.matmul(x_train, beta)
    output = jnp.sum(y_train * x_beta - jnp.log(1 + jnp.exp(x_beta)))
    return output

jit_likelihood_jax = jit(log_likelihood_jax)

In [None]:
#2.6 - speed comparison needed
grad_log_likelihood = jit(grad(log_likelihood_jax))

In [None]:
#2.7 - speed comparison is needed
DIM = 25
constant = np.pi**2 * M / (3*DIM)
Sigma = constant * np.linalg.inv(np.matmul(x_train.T, x_train))

@jit
def log_prior(beta):
    return jlogpdf(beta, mean=jnp.zeros(DIM), cov=Sigma)

In [None]:
#2.8 - speed comparison is needed
grad_log_prior = jit(grad(log_prior))

In [None]:
# 2.9 
def log_posterior(beta):
    return log_prior(beta) + log_likelihood(beta)


In [None]:
# 2.10 
def grad_log_posterior(beta):
    return grad_log_prior(beta) + grad_log_likelihood(beta)

# Section 3

In [None]:
# Q1: independent Metropolis-Hastings
def sample_prior():
    return multivariate_normal.rvs(mean=np.zeros(DIM), cov=Sigma)

n_accept = 0
N = 10000
current_beta = sample_prior()
store_beta = np.zeros((N, DIM))

In [None]:
#run the loop
for n in range(N):
    #sample a proposed state
    proposed_beta = sample_prior()

    #evaluate posterior density
    log_posterior_proposed = log_posterior(proposed_beta)
    log_posterior_current = log_posterior(current_beta)

    #evaluate transition likelihood
    log_transition_proposed = log_prior(proposed_beta)
    log_transition_current = log_prior(current_beta)
    
    #log acceptance prob
    log_accept_prob = (log_posterior_proposed + log_transition_current
                       - log_posterior_current - log_transition_proposed)

    #accept tor reject
    uniform = np.random.rand(1) # sample a uniform on [0,1]
    if np.log(uniform) < log_accept_prob:
        current_beta = proposed_beta.copy() #accept
        n_accept += 1

    store_beta[n,:] = current_beta

In [None]:
print("Acceptance rate: ", n_accept/N)

In [None]:
iteration = np.arange(1,N+1)
plt.figure()
plt.plot(iteration, store_beta[:,0])
plt.plot(iteration, store_beta[:,1])
plt.plot(iteration, store_beta[:,2])
plt.plot(iteration, store_beta[:,3])
plt.xlabel('iteration')
plt.ylabel('beta')
plt.show()

In [None]:
#3.2
sigma_list = [0.002, 0.02, 0.2]

for sigma in sigma_list:
    n_accept = 0
    current_beta = sample_prior()
    store_beta = np.zeros((N, DIM))

    for n in range(N):
        move = multivariate_normal.rvs(mean=np.zeros(DIM),
                                       cov=np.identity(DIM) * sigma**2)
        proposed_beta = current_beta + move

        #evaluate posterior density
        log_posterior_proposed = log_posterior(proposed_beta)
        log_posterior_current = log_posterior(current_beta)

        #accept tor reject
        log_accept_prob = log_posterior_proposed - log_posterior_current
        uniform = np.random.rand(1) # sample a uniform on [0,1]
        if np.log(uniform) < log_accept_prob:
            current_beta = proposed_beta.copy() #accept
            n_accept += 1

        store_beta[n,:] = current_beta

    plt.figure()
    plt.plot(iteration, store_beta[:,1])
    plt.xlabel('iteration')
    plt.ylabel('beta')
    plt.title("Evolution of beta")
    plt.show()

    plot_acf(store_beta[:,1], alpha=None)
    plt.ylim([0, 1.1])
    plt.xlabel('lag')
    plt.ylabel('autocorrelation')
    plt.show()
    print(f"Acceptance probability is {n_accept/N}.")

In [None]:
# 3.3
import scipy
s = 0.08
SIG = jnp.eye(DIM) * s**2
rng = jax.random.PRNGKey(0)
n_accept = 0
store_beta = np.zeros((N,DIM))
beta = sample_prior()

for n in range(N):
#     epsilon = jax.random.multivariate_normal(key=rng, mean= jnp.zeros(DIM), cov=SIG)    
    epsilon = np.random.multivariate_normal(mean= jnp.zeros(DIM), cov=SIG)    
    
    proposed_state = beta + s**2 /2 * gradlogdensity(beta) + epsilon
    
    pi_y = logdensity(proposed_state)
    pi_x = logdensity(beta)
    q_y = jlogpdataset(proposed_state, mean = beta + s**2 /2 \
                                                 * gradlogdensity(beta) , cov=SIG)
    q_x = jlogpdataset(beta , mean = proposed_state + s**2 /2 \
                                                 * gradlogdensity(proposed_state) , cov=SIG)
    
    
    
    logacceptprob = float(pi_y + q_x - pi_x - q_y)
    
    #accept tor reject
    uniform = np.random.rand(1) # sample a uniform on [0,1]
    if np.log(uniform) < logacceptprob:
        beta = proposed_state.copy() #accept
        n_accept += 1
    store_beta[n,:] = beta
    

In [None]:
n_accept/N

In [None]:
iteration = np.arange(1,N+1)
plt.figure()
plt.plot(iteration, store_beta[:,0])
plt.plot(iteration, store_beta[:,1])
plt.plot(iteration, store_beta[:,2])
plt.plot(iteration, store_beta[:,3])
plt.xlabel('iteration')
plt.ylabel('beta')
plt.show()

In [None]:
iteration = np.arange(1,N+1)
plt.figure()
plt.plot(iteration, np.cumsum(store_beta[:,0])/iteration)
plt.plot(iteration, np.cumsum(store_beta[:,1])/iteration)
plt.plot(iteration, np.cumsum(store_beta[:,2])/iteration)
plt.plot(iteration, np.cumsum(store_beta[:,3])/iteration)
plt.xlabel('iteration')
plt.ylabel('beta')
plt.show()

In [None]:
# auto-correlation function
from statsmodels.graphics.tsaplots import plot_acf

plt.figure()
plot_acf(store_beta[2000:,0], lags = 30, alpha = None)
plot_acf(store_beta[2000:,1], lags = 30, alpha = None)

In [None]:
#3.4
import numpy as np

s = 0.08
SIG = jnp.eye(DIM) * s**2
n_accept = 0
store_beta = np.zeros((N,DIM))
beta = sample_prior()

def hamiltonian_dynamics(current_state, current_velocity, stepsize, num_steps, gradlogdensity):
    x = current_state
    v = current_velocity
    """Simulate Hamiltonian dynamics."""    
    v = v + stepsize * np.array(gradlogdensity(x)) / 2
    for step in range(num_steps): 
        x = x + stepsize * v 
        if step != (num_steps-1):
            v = v + stepsize * np.array(gradlogdensity(x))
            v = v + stepsize * np.array(gradlogdensity(x)) / 2    
    return (x, v)

In [None]:
hamiltonian_dynamics(beta, beta, s, N, gradlogdensity)