# Instructions

This document contains three main parts:
1. Package essentials
2. Examples
3. Codes for future work

To run the examples, please execute the package essential first.

The package essentials load necessary libraries and define all essential functions required to fullfill the functionalities of the Bayesreg package.

Examples use the diabetes data from sklearn dataset. Different models and priors are executed and compared with the original Matlab version of the package.

Code for future work contains functions not fully supported for now, thus are commented.

# Package essentials

## Load libraries

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import sklearn
import datetime
from sklearn import datasets
import warnings
warnings.filterwarnings(action='ignore')

## Essential functions

In [2]:
def toNpArray(X, y, varnames):
    n, p = X.shape
    if isinstance(X, pd.DataFrame):
        if varnames is None and list(X.columns) != list(range(X.shape[1])):
            varnames = np.array(X.columns).reshape(-1,1)
        X = np.array(X)
    elif isinstance(X, list):
        X = np.array(X)
        
    if isinstance(y, pd.Series):
        y = np.array(y).reshape(-1,1)
    elif isinstance(y, list):
        y = np.array(y).reshape(-1,1)
        
    return X, y, varnames


# function sample beta0 from the posterior distribution
# validated
def sample_beta0(X, z, mu_z, Xt1, b, sigma2, omega2):
    '''
    Input parameters:
        X      - [n x p]
        z      - [n x 1]
        b      - [p x 1]
        mu_z   - float/None
        Xt1    - [p x 1]/None
        sigma2 - float
        omega2 - [n x 1]
    
    Return values:
        b0     - float
        m      - float
    '''
    if mu_z is not None:
        n = z.shape[0]
        if Xt1 is None:
            m = mu_z
        else:
            m = (mu_z - np.matmul(b.T, Xt1)[0][0]/n)
        v = sigma2/n
    else:
        W = np.sum(1/omega2)
        m = np.sum((z - np.matmul(X, b))/omega2) / W
        v = sigma2 / W
    
    b0 = m + np.random.normal()*np.sqrt(v)
    return b0, m

# function generate random variables from the inverse normal distribution
# validated
def randinvg(mu, lamda):
    '''
    Input parameters:
        mu    - [n x 1]
        lamda - float
    
    Return values:
        out   - [n x 1]
    '''
    lamda = 1/lamda
    n = mu.shape[0]
    V = np.random.normal(size = (n, 1))**2
    
    out = mu + 0.5*mu/lamda*(mu*V - np.sqrt(4*mu*lamda*V+mu**2*V**2))
    
    l = (np.random.uniform(size = (n, 1)) >= mu/(mu + out))
    out[l] = mu[l]**2/out[l]
    
    out = np.max(np.concatenate((out, np.array([[10**-5] for i in range(n)])), axis=1), axis=1).reshape(-1, 1)
    
    return out

# function generate random variables from exponential distribution
# validated
def exprnd_fast(mu):
    '''
    Input parameters:
        mu - [n x 1]/float
        
    Return values:
        r  - [n x 1]
    '''
    if not isinstance(mu, np.ndarray):
        n = 1
    else:
        n = mu.shape[0]
    r = -mu * np.log(np.random.uniform(size = (n, 1)))
    return r

# function which standardise the design matrix X and target y if required
# validated
def standardise(X, y):
    '''
    Input parameters:
        X     - [n x p]
        y     - [n x 1]/None
    
    Return values:
        X     - [n x p]
        meanX - [1 x p]
        stdX  - [1 x p]
        y     - [n x 1]
        meany - float
    '''
    n = X.shape[0]
    meanX = np.mean(X, axis=0).reshape(1, -1)
    stdX = (np.std(X, axis=0)*np.sqrt(n)).reshape(1, -1)
    
    X = X - meanX
    X = X/stdX
    
    if y is not None:
        meany = np.mean(y)
        y = y - meany
        return X, meanX, stdX, y, meany
    else:
        return X, meanX, stdX
    
# function which reverse the standardisation process
# validated
def unstandardise(X, muX, normX, y, muy):
    '''
    Input parameters:
        X     - [n x p]
        muX   - [1 x p]
        normX - [1 x p]
        y     - None/[n x 1]
        muy   - None/float
    
    Return values:
        X     - [n x p]
        y     - [n x 1]
    '''
    X = X*normX + muX
    if y is not None:
        y = y + muy
    return X, y

# function forms the diagonal entries of the regularization matrix
# validated
def make_Lambda(sigma2, tau2, lambda2):
    '''
    Input parameters:
        sigma2     - float
        tau2       - float
        lambda2    - [p x 1]
    
    Return values:
        Lambda     - [p x 1]
        delta2prod - [p x 1]
    '''
    delta2prod = np.ones(shape=(lambda2.shape[0], 1))
    Lambda = sigma2 * tau2 * lambda2 * delta2prod
    return Lambda, delta2prod

# function sample beta from the posterior distribution
# validated
def sample_beta(X, z, mvnrue, b0, sigma2, tau2, lambda2, delta2prod, omega2, XtX, Xty, Xt1, weights, gprior, b):
    '''
    Input parameters:
        X          - [n x p]
        z          - [n x 1]
        mvnrue     - bool
        b0         - float
        sigma2     - float
        tau2       - float
        lambda2    - [p x 1]
        delta2prod - [p x 1]
        omega2     - [n x 1]
        XtX        - [p x p]
        Xty        - [p x 1]
        Xt1        - [p x 1]
        weights    - bool
        gprior     - bool
        b          - [p x 1]
        
    Return values:
        b          - [p x 1]
        muB        - [p x 1]
    '''
    sigma = np.sqrt(sigma2)
    Lambda = sigma2 * tau2 * lambda2 * delta2prod
    
    # no block sampling at this time
    if weights or Xty is None or not mvnrue:
        alpha = z - b0
        
    if mvnrue:
        if Xty is None or weights:
            omega = np.sqrt(omega2)
            X = X/omega
            b, muB = fastmvg_rue(X, None, alpha, None, Lambda, sigma2, omega, gprior, XtX)
        else:
            if Xt1 is None:
                b, muB = fastmvg_rue(None, XtX, None, Xty, Lambda, sigma2, None, gprior, XtX)
            else:
                b, muB = fastmvg_rue(None, XtX, None, Xty - b0*Xt1, Lambda, sigma2, None, gprior, XtX)
    else:
        omega = np.sqrt(omega2)
        X = X/omega
        b, muB = fastmvg_bhat(X, alpha, Lambda, sigma, omega)
        
    return b, muB

# sampler for multivariate gaussian
# validated
def fastmvg_rue(Phi, PtP, alpha, Ptalpha, D, sigma2, omega, gprior, XtX):
    '''
    Input parameters:
        Phi - [n x p]
        PtP - [p x p]
        alpha - [n x 1]
        Ptalpha - [p x 1]
        D - [p x 1]
        sigma2 - 1 x 1
        omega - [n x 1]
        gprior - 1 x 1
        XtX - [p x p]
        
    Return values:
        x - [p x 1]
        m - [p x 1]
    '''
    if PtP is None:
        PtP = np.matmul(Phi.T, Phi)
        
    if Ptalpha is None:
        Ptalpha = np.matmul(Phi.T, (alpha/omega))
        
    p = D.shape[0]
    
    if not gprior:
        L = np.linalg.cholesky(PtP/sigma2 + np.diag(1/D.reshape(-1)))
    else:
        L = np.linalg.cholesky(PtP/sigma2 + XtX/D[0])
    
    v = np.linalg.solve(L, Ptalpha/sigma2)
    m = np.linalg.solve(L.T, v)
    w = np.linalg.solve(L.T, np.random.normal(size = (p, 1)))
    x = m + w
    return x, m


# sampler for multivariate gaussian
# validated
def fastmvg_bhat(Phi, alpha, D, sigma, omega):
    '''
    Input parameters:
        Phi - [n x p]
        alpha - [n x 1]
        D - [p x 1]
        sigma - float
        omega - [n x 1]
        
    Return values:
        x - [p x 1]
        u - [p x 1]
    '''
    n, p = Phi.shape
    u = np.random.normal(size = (p, 1)) * np.sqrt(D)
    delta = np.random.normal(size = (n, 1))
    
    v = np.matmul(Phi, u)/sigma + delta
    Dpt = Phi.T*D/sigma
    W = np.matmul(Phi, Dpt)/sigma + np.eye(n)
    w = np.linalg.solve(W, (alpha/omega/sigma - v))
    x = u + np.matmul(Dpt, w)
    u = x
    return x, u


# function sample error variance for linear regression model
# validated
def sample_sigma2(mu, y, b, ete, omega2, tau2, lambda2, delta2prod, gprior):
    '''
    Input parameters:
        mu         - [n x 1]
        y          - [n x 1]
        b          - [p x 1]
        ete        - float
        omega2     - [n x 1]
        tau2       - float
        lambda2    - [p x 1]
        delta2prod - [p x 1]
        gprior     - bool
        
    Return values:
        sigma2     - float
        muSigma2   - float
        e          - [n x 1]
    '''
    n = y.shape[0]
    p = b.shape[0]
    
    e = None
    if ete is None:
        e = y - mu
        ete = np.sum(e**2/omega2)
    
    shape = (n+p)/2
    
    if not gprior:
        scale = ete/2 + np.sum(b**2/lambda2/delta2prod)/2/tau2
    else:
        bXtXb = np.matmul(mu.T, mu)[0][0]
        scale = np.sum(e**2/omega2)/2 + bXtXb/tau2/2
    sigma2 = scale/np.random.gamma(shape)
    muSigma2 = scale/(shape-1)
    return sigma2, muSigma2, e


# function sample global variance hyperparameter
# validated
def sample_tau2(b, sigma2, lambda2, delta2prod, xi, mu, gprior, tau_a):
    '''
    Input parameters:
        b: p x 1
        sigma2: float
        lambda2: p x 1
        delta2prod: p x 1
        xi: float
        mu: n x 1
        gprior: bool
        tau_a: float
        
    Return values:
        tau2 - float
        muTau2 - float
    '''
    p = b.shape[0]
    shape = p/2 + tau_a
    
    if not gprior:
        scale = 1/xi + np.sum(b**2/lambda2/delta2prod)/2/sigma2
    else:
        scale = 1/xi + np.matmul(mu.T, mu)[0][0]/2/sigma2
    tau2 = scale/ np.random.gamma(shape)
    muTau2 = scale/(shape-1)
    return tau2, muTau2


# function sample xi for all models
# validated
def sample_xi(tau2, tau_ab):
    '''
    Input parameters:
        tau2 - float
        tau_ab - float
        
    Return values:
        xi - float
    '''
    scale = 1 + 1/tau2
    if tau_ab == 1:
        xi = 1/exprnd_fast(1/scale)
    else:
        shape = tau_ab
        xi = scale/np.random.gamma(shape)
    return xi


# sample lambda2 for LASSO
# validated
def sample_lambda2_lasso(b, sigma2, tau2, delta2prod):
    '''
    Input parameters:
        b - [p x 1]
        sigma2 - float
        tau2 - float
        delta2prod - [p x 1]
    
    Return values:
        lambda2 - [p x 1]
    '''
    mu = np.sqrt(2*tau2*sigma2*delta2prod/b**2)
    shape = 2
    lambda2 = 1/randinvg(mu, 1/shape)
    return lambda2


# function sample the lambda2 for horseshoe prior
# validated
def sample_lambda2_hs(b, sigma2, tau2, nu, delta2prod):
    '''
    Input parameters:
        b - [p x 1]
        sigma2 - float
        tau2 - float
        nu - [p x 1]
        delta2prod - [p x 1]
        
    Return values:
        lambda2 - [p x 1]
    '''
    scale = 1/nu + b**2/2/tau2/sigma2/delta2prod
    lambda2 = 1/exprnd_fast(1/scale)
    return lambda2


# function sample nu for horseshoe prior
# validated
def sample_nu_hs(lambda2):
    '''
    Input parameters:
        lambda2 - [p x 1]
    
    Return values:
        nu - [p x 1]
    '''
    scale = 1 + 1/lambda2
    nu = 1/exprnd_fast(1/scale)
    return nu


# function computes the probability of data for the linear models
# validated
def br_regnlike_mu(error, mu, e, y, s2, tdof):
    '''
    Input parameters:
        error: error model, gaussian, laplace, t
        mu: n x 1
        e: n x 1
        y: n x 1
        s2: float
        tdof: int
        
    Return values:
        neglike - float
        neglogprob - [n x 1]
        prob - [n x 1]
    '''
    
    if error != 'binomial':
        if error == 'gaussian':
            neglogprob = e**2/s2/2 + (1/2)*np.log(2*np.pi*s2)
        elif error == 'laplace':
            scale = np.sqrt(s2/2)
            neglogprob = np.abs(e)/scale + np.log(2*scale)
        elif error == 't':
            nu = tdof
            neglogprob = -sp.special.gammaln((nu+1)/2) + sp.special.gammaln(nu/2) + (nu+1)/2*np.log(1+1/nu*e**2/s2) + np.log(np.pi*nu*s2)/2
            
    prob = np.exp(-neglogprob)
    neglike = np.sum(neglogprob)
    
    return neglike, neglogprob, prob


# function compute the probability of data for the regression models
# validated
def br_regnlike(error, X, y, beta, beta0, s2, tdof):
    '''
    Input parameters:
        error - string
        X - [n x p]
        y - [n x 1]
        beta - [p x nsamples]
        beta0 - [1 x namples]
        s2 - float
        tdof - float
        
    Return values:
        neglike - [nsamples x 1]
        neglogprob - [n x nsamples]
        prob - [n x nsamples]
        mu - [n x nsamples]
    '''
    mu = np.matmul(X, beta) + beta0
    if error != 'binomial':
        e = mu - y
        if error == 'gaussian':
            neglogprob = e**2/s2/2 + (1/2)*np.log(2*np.pi*s2)
        elif error == 'laplace':
            scale = np.sqrt(s2/2)
            neglogprob = np.abs(e)/scale + np.log(2*scale)
        elif error == 't':
            nu = tdof
            neglogprob = -sp.special.gammaln((nu+1)/2) + sp.special.gammaln(nu/2) + (nu+1)/2*np.log(1+1/nu*e**2/s2) + np.log(np.pi*nu*s2)/2
            
    prob = np.exp(-neglogprob)
    neglike = np.sum(neglogprob, axis=0)
    
    return neglike, neglogprob, prob, mu


# function compute the model stats
# validated
def br_compute_model_stats(y, X, retval):
    # model type
    gaussian = False
    lapalce = False
    tdist = False
    binomial = False

    if retval['runstats']['model'] in ['binomial','logistic']:
        binomial = True
        model = 'binomial'
    elif retval['runstats']['model'] in ['gaussian','normal']:
        gaussian = True
        model = 'gaussian'
    elif retval['runstats']['model'] in ['laplace', 'l1']:
        laplace = True
        model = 'laplace'
    elif retval['runstats']['model'] in ['t','student']:
        tdist = True
        model = 't'
        
    # stats for continuous model
    if not binomial:
        mu = np.matmul(X, retval['muB']) + retval['muB0']
        modelstats = {}
        
        modelstats['logl'] = -br_regnlike(model, X, y, retval['muB'], retval['muB0'], retval['muSigma2'], retval['runstats']['tdof'])[0]
        modelstats['r2'] = 1 - np.sum((y - mu)**2) / np.sum((y - np.mean(y))**2)
        
    return modelstats


# function calculate the effective sample size
# validated
def ess(x):
    n = len(x)
    s = min(n-1, 2000)
    g = my_autocorr(x, s)
    
    G = g[1:s] + g[2:s+1]
    ix = [i for i, x in enumerate((G < 0).reshape(-1)) if x]
    
    ESS = 0
    ESSfrac = 0
    if len(ix) > 0:
        k = ix[0]
        
        V = g[0] + 2*np.sum(g[1:k+1])
        ACT = V/g[0]
        ESS = min(n/ACT, n)
        ESSfrac = ESS/n
    return ESS, ESSfrac

# function calculate autocorrelation
# validated
def my_autocorr(y, order):
    y = y - np.mean(y)
    nFFT = int(2**(nextpow2(len(y)) + 1))
    F = np.fft.fft(y, nFFT, axis=0)
    F = F * np.conj(F)
    acf = np.fft.ifft(F, axis=0)
    
    acf = acf[:order+1]
    acf = np.real(acf)
    acf = acf/acf[0]
    return acf


# function get the smallest number 2 to the power of greater than n
# validated
def nextpow2(n):
    n = abs(n)
    if n == 0:
        return 0.0
    else:
        return np.ceil(np.log2(n))
    
    
# function calculate the percentile
# validated
def prctile(X, perct):
    '''
    X: n x p
    perct: numeric
    '''
    N, _ = X.shape
    X_sort = np.sort(X, axis = 0)
    
    perct_array = np.array([100*((0.5 + n)/N) for n in range(N)])
    if perct in perct_array:
        return X_sort[perct_array == perct][0].reshape(-1, 1)
    else:
        if perct < perct_array[0]:
            return X_sort[0][0].reshape(-1, 1)
        elif perct > perct_array[-1]:
            return X_sort[-1][0].reshape(-1, 1)
        else:
            X_low = X_sort[perct_array < perct][-1]
            X_up = X_sort[perct_array > perct][0]
            p_low = perct_array[perct_array < perct][-1]
            p_up = perct_array[perct_array > perct][0]
            return (X_low + ((perct - p_low)/(p_up - p_low))*(X_up - X_low)).reshape(-1, 1)
        

# function get the ranking of variables
# validated
def bfr(b):
    '''
    Inputs:
        b: [p x nsamples] Regression parameters
    
    Return:
        varranks: [p x 1]
    '''
    p, nsamples = b.shape

    ranks = np.zeros(shape = (p, nsamples))
    
    for i in range(nsamples):
        value = np.abs(b[:,i])
        O = [x for _, x in sorted(zip(value, list(range(len(value)))), reverse=True)]
        ranks[O, i] = range(1, p+1)
        
    q = prctile(ranks.T, 75)
    O = [x for _, x in sorted(zip(q, list(range(len(q)))))]

    varranks = np.array([[None] for i in range(p+1)])
    j = 1
    k = 1
    
    for i in range(p):
        if i >= 1:
            if q[O[i]] != q[O[i-1]]:
                j += k
                k = 1
            else:
                k += 1
        varranks[O[i]] = j

    return varranks


def sample_omega2_laplace(e, sigma2):
    mu = np.sqrt(2*sigma2/e**2)
    lamda = 2
    omega2 = 1/randinvg(mu, 1/lamda)
    return omega2

def sample_omega2_tdist(e, sigma2, tdof):
    n = len(e)
    a = (tdof + 1)/2
    b = e**2/sigma2/2 + tdof/2
    omega2 = b/np.random.gamma(shape = a, size = (n, 1))
    return omega2


def br_summary(beta, beta0, retval):
    varnames = retval['Xstats']['varnames']
    nx = retval['Xstats']['nx']
    px = retval['Xstats']['px']
    
    model = retval['runstats']['model']
    prior = retval['runstats']['prior']
    nsamples = retval['runstats']['nsamples']
    burnin = retval['runstats']['burnin']
    thin = retval['runstats']['thin']
    normalize = retval['runstats']['normalize']
    runBFR = retval['runstats']['runBFR']
    sortrank = retval['runstats']['sortrank']
    displayor = retval['runstats']['displayor']
    tdof = retval['runstats']['tdof']
    #isVarCat = retval['runstats']['']
    #XtoZ = retval['runstats']['']
    
    # Model type
    gaussian = False
    lapalce = False
    tdist = False
    binomial = False

    if model in ['binomial','logistic']:
        binomial = True
        model = 'binomial'
    elif model in ['gaussian','normal']:
        gaussian = True
        model = 'gaussian'
    elif model in ['laplace', 'l1']:
        laplace = True
        model = 'laplace'
    elif model in ['t','student']:
        tdist = True
        model = 't'
        
    # compute ess for each variable
    ESSfrac = np.zeros(shape = (px, 1))
    for j in range(px):
        _, ESSfrac[j] = ess(beta[j,:].reshape(-1, 1))
    
    # table symbols
    chline = '-'
    cvline = '|'
    cTT    = '+'
    
    # find length of the longest variable name
    maxlen = 12
    for i in range(px):
        if len(varnames[i]) > maxlen:
            maxlen = len(varnames[i])
    
    fmtstr = ' ' * maxlen
    
    # display pretable information
    if binomial:
        modeltxt = 'logistic'
    else:
        modeltxt = 'linear'
    
    excess_s = ' ' * (maxlen - 12)
    print('Bayesian ' + modeltxt + ' ' + prior + ' regression', '\n\n')
    print('Number of obs   =' + excess_s + str(nx))
    print('Number of vars  =' + excess_s + str(px),'\n')
    
    if not binomial:
        s2 = retval['muSigma2'][0][0]
        if tdist:
            s2 = tdof/(tdof-2)*s2
        print('MCMC Samples    =' + excess_s + str(nsamples))
        print('MCMC Burnin     =' + excess_s + str(burnin))
        print('MCMC Thining    =' + excess_s + str(thin), '\n')
        
        print('Root MSE        =' + excess_s + str(np.round(np.sqrt(s2), 3)))
        print('R-Squared       =' + excess_s + str(np.round(retval['modelstats']['r2'], 3)))
        print('WAIC            =' + excess_s + str(np.round(retval['modelstats']['waic'], 3)), '\n')
        
    # print table header
    print(chline*(maxlen+1), cTT, chline*83)
    print(' '*(maxlen - 10) + 'Parameter'+' '*6+'mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS')
    print(chline*(maxlen+1), cTT, chline*83)
    
    # variable information
    if runBFR and sortrank:
        indices = [x for _, x in sorted(zip(retval['varranks'], list(range(len(retval['varranks'])))))]
    else:
        indices = list(range(px+1))
        
    incat = -1
    for i in range(px+1):
        k = indices[i]
        
        # regression variables
        if k < px:
            kappa = retval['tStats'][k]
            s = beta[k,:]
            mu = retval['muB'][k][0]
            if binomial:
                mu = retval['medB'][k]
        
        # intercept
        elif k == px:
            s = beta0
            mu = np.mean(s)
            if binomial:
                mu = retval['medB0']
        
        # compute the credible intervals
        std_err = np.std(s)
        s = s.reshape(-1,1)
        qlin = np.concatenate((prctile(s, 2.5).reshape(1, -1), prctile(s, 25).reshape(1, -1), prctile(s, 75).reshape(1, -1), prctile(s, 97.5).reshape(1, -1)), axis=0)
        qlog = np.concatenate((prctile(np.exp(s), 2.5).reshape(1, -1), prctile(np.exp(s), 25).reshape(1, -1), prctile(np.exp(s), 75).reshape(1, -1), prctile(np.exp(s), 97.5).reshape(1, -1)), axis=0)
        
        q = qlin
        if binomial and displayor:
            mu = np.exp(mu)
            std_err=  (qlog[-1] - qlog[0])/2/1.96
            q = qlog
        
        #display results
        if k >= px:
            tstats = '        .'
            E = '        .'
        else:
            tstats = '    '+str(np.round(kappa[0], 3))
            E = '        ' + str(np.round(ESSfrac[k][0]*100, 3))
            
        if retval['varranks'][k] is None:
            rank = '        .'
        else:
            rank = '    ' + str(retval['varranks'][k][0])
        
        varname = varnames[k][0]
        print(maxlen*' ' + varname + cvline +'    ' +str(np.round(mu, 3))+'    '*2+str(np.round(std_err, 3))+'    '*3+str(np.round(q[0][0], 3))+'    '+str(np.round(q[-1][0], 3))+tstats+rank+E)
        
        
def bayesreg(X, y, model, prior, normalize = True, runBFR = True, nsamples = 1000, burnin = 1000, thin = 5, display = True, 
             displayor = False, varnames = None, sortrank = False, tdof = 5, catvars = None, nogrouping = False, 
             groups = None, tau2prior = [0.5,0.5], blocksample = None, blocksize = None, waic = True):
    '''
    Input parameters:
        Required:
            X           - [n x p], design matrix

            y           - [n x 1], target vector

            model       - str, population distribution

            prior       - str, prior distribution
            
        Optional:
            normalize   - bool, whether to normalize design matrix X, default True
            
            runBFR      - bool, whether to compute ranking of importance for predictors, default: True
            
            nsamples    - int, number of posterior samples, default: 1000

            burnin      - int, number of burnin samples, default: 1000

            thin        - int, level of thining, default: 5

            display     - bool, whether to display summary stats, default: True

            displayor   - bool, whether to display odds ratio for logistic regression, default: False

            varnames    - [p x 1]/None, names for the variables, default: None

            sortrank    - bool, whether to display variables as the order of ranking, default: False

            tdof        - int, degree of freedom for student-t distribution, default: 5

            carvars     - [ncat x 1]/None indexes of categorical variables, default: None

            nogrouping  - bool, whether stop automatically grouping categorical variables after expansion, default: False

            groups      - None/2D list with the first dimension indicating groups and the second dimension indicating which predictors are in the group, default: None

            tau2prior   - str/[a, b], hyperparameters of beta prior on shrinkage parameters, default: [0.5, 0.5]

            blocksample - None/int, the number of blocks when sampling beta, default: None

            blocksize   - None/int, the approximate size of each block when sampling beta, default: None

            waic        - bool, whether to compute WAIC, default: True
    
    Return parameters:
            beta        - [p x nsamples], posterior samples of beta
            beta0       - [1 x nsamples], posterior samples of beta0
            retval      - dict, additional sampling information
    '''
    start = datetime.datetime.now()
    # format X, y to numpy array
    X, y, varnames = toNpArray(X, y, varnames)
    
    # Data dimension
    nx, px = X.shape
    ny, py = y.shape
    
    # Constants
    MAX_PRECOMPUTED_PX = 2e4
    
    #expectedModel= ['gaussian', 'normal', 'laplace', 't', 'studentt', 'binomial', 'logistic']
    #expectedPrior = ['ridge','rr','horseshoe','hs','lasso','hs+','horseshoe+','gprior','g']
    #expectedTau2Prior = ['hc','sb','uniform']
    
    # Model type
    gaussian = False
    laplace = False
    tdist = False
    binomial = False
    
    if model in ["binomial", "logistic"]:
        binomial = True
        model = "binomial"
    elif model in ["gaussian", "normal"]:
        gaussian = True
        model = "gaussian"
    elif model in ["laplace", "l1"]:
        laplace = True
        model = "laplace"
    elif model in ["t", "student"]:
        tdist = True
        model = "t"
        
    # Prior type
    gprior = False
    ridge = False
    lasso = False
    horseshoe = False
    horseshoeplus = False
    
    if prior in ["gprior", "g"]:
        gprior = True
        prior = 'g'
        nogrouping = True
    elif prior in ["ridge", "rr"]:
        ridge = True
        prior = 'ridge'
        nogrouping = True
    elif prior in ["lasso"]:
        lasso = True
        prior = 'lasso'
    elif prior in ["horseshoe", "hs"]:
        horseshoe = True
        prior = 'horseshoe'
    elif prior in ["horseshoe+", "hs+"]:
        horseshoeplus = True
        prior = 'horseshoe+'
        
    # type of tau2 prior
    if isinstance(tau2prior, str):
        if tau2prior == 'hc':
            tau_a = 0.5
            tau_b = 0.5
        elif tau2prior == 'sb':
            tau_a = 0.5
            tau_b = 1.0
        elif tau2prior == 'uniform':
            tau_a = 1.0
            tau_b = 1.0
    else:
        tau_a = tau2prior[0]
        tau_b = tau2prior[1]
    
    # create temporary variable names
    if varnames is None:
        varnames = np.array([['v'+str(i+1)] for i in range(px)])
        varnames = np.concatenate((varnames, [['cons']]), axis=0)
        
    # setup variable processing rule
    vars_ = {}
    vars_['description'] = 'Variable information'
    vars_['varnames'] = varnames
    vars_['isVarCat'] = np.array([[False] for i in range(px)])
    vars_['isVarCat'][catvars] = True
    
    
    # change y to z   
    z = y
    weights = laplace or tdist or binomial
    
    # normalize data
    if not normalize:
        muX = np.zeros(shape=(1, px))
        normX = np.ones(shape=(1, px))
    else:
        X, muX, normX = standardise(X, None)
        
    # return values
    retval = {"sparsify_method":"", 'sparseB0':None, "sparseB": None, "medB0": None, "medB": None,
              "muB0": 0, "muB": np.zeros(shape=(px, 1)), "tau2": np.zeros(shape=(1, nsamples)), "xi": np.zeros(shape=(1, nsamples))}
    beta0 = np.zeros(shape = (1, nsamples))
    beta = np.zeros(shape = (px, nsamples))
    
    if not binomial:
        retval["sigma2"] = np.zeros(shape=(1, nsamples))
        retval["muSigma2"] = 0
    if not (ridge or gprior):
        retval["lambda2"] = np.zeros(shape = (px, nsamples))
    
    # Initial values for sampling
    b = np.random.normal(size=(px, 1))
    sigma2 = 1
    e = None
    tau2 = 1
    xi = 1
    lambda2 = np.ones(shape=(px, 1))
    omega2 = np.ones(shape=(nx, 1))
    nu = np.ones(shape=(px, 1))
    phi2 = np.ones(shape=(px, 1))
    zeta = np.ones(shape=(px, 1))
    XtX = None
    Xty = None
    Xt1 = None
    negll = np.zeros(shape=(1, nsamples))
    waicProb = np.zeros(shape=(nx, 1))
    waicLProb = np.zeros(shape=(nx, 1))
    waicLProb2 = np.zeros(shape=(nx, 1))
    
    # determine sampling algorithm
    mvnrue = True
    if(px/nx >= 2):
        mvnrue = False;
    
    # precomputation
    precompute = False
    if ((gaussian and mvnrue) or gprior) and px < MAX_PRECOMPUTED_PX:
        precompute = True
        if gaussian:
            yty = np.matmul(z.T, z)[0][0]
            Xty = np.matmul(X.T, z)
        XtX = np.matmul(X.T, X)
    
    #Always precompute mean(z) if Gaussian
    mu_z = None
    if gaussian:
        mu_z = np.mean(z)
        if not normalize:
            Xt1 = np.sum(X, axis=0).reshape(-1,1)

    # Statistics for result structure retval
    retval["runstats"] = {}
    retval["runstats"]["description"] = "run arguments"
    retval['runstats']['model']= model
    retval['runstats']['prior'] = prior
    retval['runstats']['nsamples'] = nsamples
    retval['runstats']['burnin'] = burnin
    retval['runstats']['thin'] = thin
    retval['runstats']['normalize'] = normalize
    retval['runstats']['runBFR']= runBFR
    retval['runstats']['sortrank'] = sortrank
    retval['runstats']['displayor'] = displayor
    retval['runstats']['blocksample'] = blocksize
    retval['runstats']['tdof'] = tdof
    retval['runstats']['tau2prior'] = [tau_a, tau_b]
    
    if not tdist:
        retval['runstats']['tdof'] = None
        
    # X stats
    retval["Xstats"] = {}
    retval['Xstats']['description'] = 'Predictor matrix statistics'
    retval['Xstats']['varnames'] = varnames
    retval['Xstats']['nx'] = nx
    retval['Xstats']['px'] = px
    retval['Xstats']['muX'] = muX
    retval['Xstats']['normX'] = normX
    
    ######################################
    k = -1
    iters = 0
    while k < nsamples-1:
        # sample beta0
        b0, muB0 = sample_beta0(X, z, mu_z, Xt1, b, sigma2, omega2)
        
        # form diagonal Lambda matrix
        _, delta2prod = make_Lambda(sigma2, tau2, lambda2)
        
        # sample beta
        b, muB = sample_beta(X, z, mvnrue, b0, sigma2, tau2, lambda2, delta2prod, omega2, XtX, Xty, Xt1, weights, gprior, b)
        
        mu = None
        if gprior or XtX is None or nx < px or waic:
            mu = np.matmul(X, b) + b0
        
        # sample sigma2
        if not binomial:
            ete = None
            if mu is None:
                if Xt1 is None:
                    ete = yty - 2*np.matmul(Xty.T, b) + np.matmul(np.matmul(b.T, XtX), b) + b0**2*nx - 2*mu_z*nx*b0
                else:
                    ete = yty - 2*np.matmul(Xty.T, b) + np.matmul(np.matmul(np.concatenate((b, np.array([[b0]]))).T, np.concatenate((np.concatenate((XtX, Xt1)), np.concatenate((Xt1.T, np.array([[nx]])))), axis=1)), np.concatenate(b, np.array([[b0]]))) - 2*mu_z*nx*b0
            sigma2, muSigma2, e = sample_sigma2(mu, y, b, ete, omega2, tau2, lambda2, delta2prod, gprior)
        
        # sample omega2
        if weights:
            if laplace:
                omega2 = sample_omega2_laplace(e, sigma2)
            elif tdist:
                omega2 = sample_omega2_tdist(e, sigma2, tdof)
        
        # sample tau2
        tau2, _ = sample_tau2(b, sigma2, lambda2, delta2prod, xi, mu, gprior, tau_a)
        
        # sample xi
        xi = sample_xi(tau2, tau_a + tau_b)
        
        # individual shrinkage
        if lasso:
            lambda2 = sample_lambda2_lasso(b, sigma2, tau2, delta2prod)
        elif horseshoe:
            lambda2 = sample_lambda2_hs(b, sigma2, tau2, nu, delta2prod)
            nu = sample_nu_hs(lambda2)
        elif horseshoeplus:
            lambda2 = sample_lambda2_hs(b, sigma2, tau2, nu, delta2prod*phi2)
            nu = sample_nu_hs(lambda2)
            
            phi2 = sample_lambda2_hs(b, sigma2, tau2, zeta, delta2prod*lambda2)
            zeta = sample_nu_hs(phi2)
            
            lambda2 = lambda2 * phi2
            
        # collect samples
        iters += 1
        if iters > burnin:
            # thining
            if iters%thin == 0:
                k += 1
                # store posterior samples
                beta0[0][k] = b0
                beta[:,k] = b.T
                
                # store posterior means
                retval["muB"] += muB
                retval["muB0"] += muB0
                retval["tau2"][0][k] = tau2
                
                # negloglikelihood of the model
                if mu is not None:
                    negll[0][k], lprob, prob = br_regnlike_mu(model, mu, e, y, sigma2, tdof)
                else:
                    negll[0][k] = (nx/2)*np.log(2*np.pi*sigma2) + ete/2/sigma2
                    prob = 0
                    lprob = 0
                
                # calculate WAIC
                waicProb = waicProb + prob
                waicLProb = waicLProb + lprob
                waicLProb2 = waicLProb2 + lprob**2
                
                if not binomial:
                    retval['sigma2'][0][k] = sigma2
                    retval['muSigma2'] = retval['muSigma2'] + muSigma2
                    
                if not (ridge or gprior):
                    retval['lambda2'][:,k] = lambda2.reshape(-1)
                    
    # compute average posterior means
    retval["muB"] /= nsamples
    retval["muB0"] /= nsamples
    if not binomial:
        retval['muSigma2'] /= nsamples
        
    # other stats
    retval['tStats'] = retval['muB']/np.std(beta, axis=1).reshape(-1,1)
    retval['varranks'] = np.array([[None] for i in range(px+1)])
    retval['vars'] = vars_

    # if required, compute variable ranking
    if runBFR:
        retval['varranks'] = bfr(beta)
        
    # compute model fit stats
    retval['modelstats'] = br_compute_model_stats(y, X, retval)
    retval['modelstats']['negll'] = negll
    if waic:
        retval['modelstats']['waic_dof'] = np.sum(waicLProb2/nsamples) - np.sum((waicLProb/nsamples)**2)
        retval['modelstats']['waic'] = -np.sum(np.log(waicProb/nsamples)) + retval['modelstats']['waic_dof']
    else:
        retval['modelstats']['waic_dof'] = np.Inf
        retval['modelstats']['waic'] = np.Inf
        
    # rescale coefficients
    if normalize:
        beta = beta/normX.reshape(-1,1)
        beta0 = beta0 - np.matmul(muX, beta)
        
        retval['muB'] = retval['muB']/normX.reshape(-1,1)
        retval['muB0'] = retval['muB0'] - np.matmul(muX, retval['muB'])
    
    # posterior median estimation
    retval['medB'] = np.median(beta, axis=1)
    retval['medB0'] = np.median(beta0)
    
    
    retval['runstats']['rundate'] = str(datetime.datetime.now().date())
    retval['runstats']['runtime'] = (datetime.datetime.now() - start).total_seconds()
    
    if display:
        br_summary(beta, beta0, retval)
        
    return retval, beta0, beta

# Examples
Below will show examples trying different models and priors, including gaussian, laplace and student-t for models and ridge, lasso, horseshoe and horseshoe+ for priors. The results are compared with the Matlab version of the Bayesreg.

From the comparison it can be observed that the python implementation and the Matlab implementation provide similar results. The R^2 and the WAIC are the same.

## Load the diabetes dataset

In [3]:
data = sklearn.datasets.load_diabetes(return_X_y=False)
X = data['data']
print(X.shape)
y = data['target'].reshape(-1, 1)
print(y.shape)

(442, 10)
(442, 1)


## Gaussian model with ridge prior

In [4]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 'gaussian', 'ridge')

Bayesian linear ridge regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =54.437
R-Squared       =0.515
WAIC            =2398.05 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -3.803        59.934            -121.861    117.04    -0.063    10        83.692
            v2|    -224.719        60.333            -347.105    -108.006    -3.725    4        100.0
            v3|    511.268        66.187            381.382    643.289    7.725    1        100.0
            v4|    313.809        64.723            183.672    432.562    4.848    3        100.0
            v5|    -184.269        207.842            -629.666    218.705    -0.887   

## The corresponding Matlab result
<img src="img/gaussian_ridge.png" width='600px'>

## Gaussian model with Lasso prior

In [5]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 'gaussian', 'lasso')

Bayesian linear lasso regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =54.388
R-Squared       =0.515
WAIC            =2397.489 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -3.721        53.14            -106.694    106.726    -0.07    10        94.562
            v2|    -211.592        58.917            -315.766    -91.198    -3.591    4        100.0
            v3|    523.057        67.461            388.693    657.372    7.754    1        100.0
            v4|    306.715        65.803            177.866    434.296    4.661    3        100.0
            v5|    -181.111        187.153            -616.076    105.056    -0.968    

## The corresponding Matlab result
<img src="img/gaussian_lasso.png" width='600px'>

## Gaussian model with horseshoe prior

In [6]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 'gaussian', 'horseshoe')

Bayesian linear horseshoe regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =54.424
R-Squared       =0.514
WAIC            =2397.745 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -2.592        43.056            -94.144    85.588    -0.06    10        87.228
            v2|    -195.514        67.562            -320.113    -58.575    -2.894    4        86.766
            v3|    534.607        69.647            399.423    666.466    7.676    1        100.0
            v4|    300.124        66.688            170.937    433.867    4.5    3        89.813
            v5|    -174.993        195.432            -709.323    65.382    -0.895  

## The corresponding Matlab result
<img src="img/gaussian_horseshoe.png" width='600px'>

## Gaussian model with horseshoe+ prior

In [7]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 'gaussian', 'horseshoe+')

Bayesian linear horseshoe+ regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =54.422
R-Squared       =0.514
WAIC            =2397.768 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -1.965        35.44            -77.477    78.471    -0.055    10        100.0
            v2|    -193.954        68.757            -326.029    -50.27    -2.821    4        100.0
            v3|    538.712        68.045            408.379    670.578    7.917    1        95.126
            v4|    302.388        66.225            166.441    424.102    4.566    3        98.168
            v5|    -140.722        167.302            -611.971    58.538    -0.841 

## The corresponding Matlab result
<img src="img/gaussian_horseshoe+.png" width='600px'>

## Laplace model with ridge prior

In [8]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 'laplace', 'ridge')

Bayesian linear ridge regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =62.073
R-Squared       =0.512
WAIC            =2425.955 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -25.633        59.051            -148.229    81.805    -0.434    10        64.245
            v2|    -301.471        59.505            -420.812    -188.937    -5.066    4        66.267
            v3|    473.344        68.598            332.354    614.994    6.9    2        72.281
            v4|    376.237        66.431            244.245    505.283    5.664    3        64.243
            v5|    -209.831        224.089            -680.928    177.998    -0.936

## The corresponding Matlab result
<img src="img/laplace_ridge.png" width='600px'>

## Laplace model with Lasso prior

In [9]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 'laplace', 'lasso')

Bayesian linear lasso regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =61.943
R-Squared       =0.513
WAIC            =2425.464 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -21.557        54.482            -138.932    81.9    -0.396    9        81.402
            v2|    -290.029        62.808            -409.183    -169.562    -4.618    4        72.573
            v3|    478.17        69.507            341.612    620.739    6.879    1        72.797
            v4|    368.158        68.594            240.786    497.233    5.367    3        62.007
            v5|    -221.251        201.417            -676.943    100.612    -1.098  

## The corresponding Matlab result
<img src="img/laplace_lasso.png" width='600px'>

## Lapalce model with horseshoe prior

In [10]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 'laplace', 'horseshoe')

Bayesian linear horseshoe regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =61.971
R-Squared       =0.512
WAIC            =2425.505 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -16.782        44.877            -115.483    63.747    -0.374    10        60.13
            v2|    -284.198        66.356            -409.867    -153.04    -4.283    4        58.792
            v3|    489.036        69.274            348.245    626.715    7.059    2        64.762
            v4|    361.924        69.371            229.207    492.418    5.217    3        52.088
            v5|    -202.689        186.085            -633.918    82.478    -1.

## The corresponding Matlab result
<img src="img/laplace_horseshoe.png" width='600px'>

## Laplace model with horseshoe+ prior

In [11]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 'laplace', 'horseshoe+')

Bayesian linear horseshoe+ regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =61.918
R-Squared       =0.512
WAIC            =2424.741 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -11.702        37.911            -103.66    57.512    -0.309    9        59.345
            v2|    -288.598        63.528            -417.441    -156.002    -4.543    4        60.653
            v3|    484.961        72.141            348.603    629.713    6.722    2        73.885
            v4|    362.819        67.65            224.906    489.488    5.363    3        57.934
            v5|    -204.696        199.999            -707.471    59.594    -1.

## The corresponding Matlab result
<img src="img/laplace_horseshoe+.png" width='600px'>

## T model with ridge prior

In [12]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 't', 'ridge')

Bayesian linear ridge regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =60.858
R-Squared       =0.515
WAIC            =2409.255 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -18.01        58.801            -136.799    102.86    -0.306    10        89.367
            v2|    -265.852        62.34            -389.318    -141.207    -4.265    4        100.0
            v3|    518.63        66.285            383.677    652.052    7.824    1        100.0
            v4|    330.423        61.327            204.051    452.531    5.388    3        100.0
            v5|    -200.239        213.756            -619.112    214.31    -0.937    5

## The corresponding Matlab result
<img src="img/t_ridge.png" width='600px'>

## T model with Lasso prior

In [13]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 't', 'lasso')

Bayesian linear lasso regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =60.825
R-Squared       =0.515
WAIC            =2408.954 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -16.166        50.193            -118.36    81.142    -0.322    10        100.0
            v2|    -254.827        63.449            -383.299    -132.64    -4.016    4        92.795
            v3|    526.419        68.858            386.19    659.939    7.645    1        89.618
            v4|    325.472        63.681            196.774    452.909    5.111    3        92.649
            v5|    -201.829        194.41            -669.021    123.985    -1.038   

## The corresponding Matlab result
<img src="img/t_lasso.png" width='600px'>

## T model with horseshoe prior

In [14]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 't', 'horseshoe')

Bayesian linear horseshoe regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =60.919
R-Squared       =0.514
WAIC            =2408.963 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -10.697        43.323            -105.893    74.096    -0.247    10        100.0
            v2|    -244.022        64.374            -374.216    -114.3    -3.791    4        100.0
            v3|    537.912        71.688            397.403    682.782    7.504    1        89.482
            v4|    315.411        69.273            182.085    453.3    4.553    3        84.376
            v5|    -191.206        184.26            -612.327    61.572    -1.038  

## The corresponding Matlab result
<img src="img/t_horseshoe.png" width='600px'>

## T model with horseshoe+ prior

In [15]:
beta, beta0, retval = bayesreg(X.copy(), y.copy(), 't', 'horseshoe+')

Bayesian linear horseshoe+ regression 


Number of obs   =442
Number of vars  =10 

MCMC Samples    =1000
MCMC Burnin     =1000
MCMC Thining    =5 

Root MSE        =60.861
R-Squared       =0.514
WAIC            =2408.798 

------------- + -----------------------------------------------------------------------------------
  Parameter      mean(Coef)    std(Coef)        [95% Cred. Interval]      tStat    Rank       ESS
------------- + -----------------------------------------------------------------------------------
            v1|    -7.778        35.431            -89.172    60.803    -0.22    10        96.193
            v2|    -245.847        65.034            -371.946    -120.287    -3.78    4        68.981
            v3|    538.858        69.023            400.754    677.66    7.807    1        100.0
            v4|    317.417        67.558            177.272    445.018    4.698    3        100.0
            v5|    -183.812        188.728            -647.349    52.295    -0.974 

## The corresponding Matlab result
<img src="img/t_horseshoe+.png" width='600px'>

# Code for future work

Please ignore for now. Code below does not have an effect on the current procedure.

def sample_delta2_hs(b, sigma2, tau2, lambda2, rho, delta2prod, delta2, groups, nGroups, GroupSizes):
    delta2prod = delta2prod/delta2[groups]
    K = b**2/lambda2/delta2prod
    
    scale = np.zeros(shape = (1, nGroups))
    for i in range(nGroups):
        ix = (groups == i)
        scale[i] = 1/rho[i] + 1/2/tau2/sigma2 * np.sum(K[ix], axis=0)
    delta2[:-1] = scale/np.random.gamma(shape = 1, scale=(GroupSizes.T+1)/2)
    
    delta2prod = delta2prod * delta2[groups]
    return delta2, delta2prod

def sample_delta2_lasso(b, sigma2, tau2, lambda2, delta2prod, delta2, groups, nGroups, GroupSizes):
    delta2prod = delta2prod/delta2[groups]
    K = b**2/lambda2/delta2prod
    
    for i in range(nGroups):
        ix = (groups == i)
        
        gig_p = GroupSizes[i]/2 - 1
        gig_a = 1/tau2/sigma2 * np.sum(K[ix], axis=0)
        gig_b = 2
        
        delta2[i] = gigrnd(gig_p, gig_a, gig_b, 1)
    delta2prod = delta2prod * delta2[groups]
    return delta2, delta2prod

def sample_nu_hsplus(lambda2, phi2):
    scale = 1/phi2 + 1/lambda2
    nu = 1/exprnd_fast(1/scale)
    return nu

def sample_phi2_hsplus(mu, zeta):
    scale = 1/nu + 1/zeta
    phi2 = 1/exprnd_fast(1/scale)
    return phi2

def sample_zeta_hsplus(phi2):
    scale = 1 + 1/phi2
    zeta = 1/exprnd_fast(1/scale)
    return zeta