In [None]:
# Imports necessary Packages
import numpy as np
import scipy.stats as sps
import pandas as pd

# Specific Plotting Packages
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib import patches
from mpl_toolkits.mplot3d import Axes3D

# sklearn
from sklearn.preprocessing import normalize
from sklearn.mixture import BayesianGaussianMixture

# other specific packages
from scipy.integrate import quad
from scipy.special import gamma as gamma_func
import warnings

# Import Custom Packages
from distr_tools import mixture_dist
import supplemental_funcs as sf
import example_master as EM # sets values for example

# to show in notebook
%matplotlib inline

In [None]:
from importlib import reload

In [None]:
plt.rcParams.update({'font.size': 14})

In [None]:
# fig_save_dictionary { 'filename' : figname }
fig_all_master = {}

# Review of Density Estimation

In this notebook, we go through and review the following density estimation techniques for the observed distribution from a finite sample, producing figures as we go.

1. Kernel Density Estimates
2. Bayesian Mixture Model
3. Dirichlet Process Mixture Model

For these examples, we will use the following distribution and sample for reference.

In [None]:
# get observed target distribution and sample from parameter file
obs_dist = EM.tri_peak_mixture
obs_sample_full = EM.tri_peak_sample

In [None]:
qx = EM.tri_peak_qx
plt.plot(qx,obs_dist.pdf(qx),label='Target Density')
plt.hist(obs_sample_full, density=True,edgecolor='k',
         color='xkcd:sky blue', alpha=0.25,
         label='Full Sample n={}'.format(len(obs_sample_full)))
plt.legend()

This density above is a mixture model (see [`example_master.py`](example_master.py)) with the following parameters:

\begin{align}
\pi^{obs}(q)&= \sum_{k=1}^3 w_k f_k(q) \\
\\
f_1(q)&\sim 3(\text{Beta}(2,5))-2 \\
f_2(q)&\sim \text{Truncnorm}(\mu=2.5,\sigma=1.5,[-2,\infty]) \\
f_3(q)&\sim \text{Truncnorm}(\mu=10,\sigma=2.5,[-2,\infty]) \\
\\
w &= \left(\frac{2}{10},\frac{5}{10},\frac{3}{10}\right) \\
\end{align}


# Kernel Density Estimate

First we just illustrate what a KDE looks like using the sum of bumps.

In [None]:
this_n = 5
# rand_ind = np.random.randint(0,100,this_n)
# print(rand_ind)
ind_ex = np.array([34,57,10,96,13]) # choose some points for example
this_obs = obs_sample_full[ind_ex]
small_h_factor = this_n**(-1/5)/np.std(this_obs,ddof=1)
this_kde_list = [sps.gaussian_kde(this_obs),
                 sps.gaussian_kde(this_obs,bw_method=small_h_factor)]

# list of bandwidth values used in the kde
h_list = [this_kde_list[1].scotts_factor()*np.std(this_obs,ddof=1),
          small_h_factor*np.std(this_obs,ddof=1)]


def kde_bump(x,mu,h):
    return 1/(this_n*h)*sps.norm.pdf((x-mu)/h)

In [None]:
fig_kde_describe, axes = plt.subplots(1,2,sharey=False)
fig_kde_describe.set_figwidth(10)
shift = 3
this_q = qx-shift

for ax,this_kde,this_h in zip(axes,this_kde_list,h_list):
    ax.plot(this_q,this_kde(this_q),label='GKDE')

    # plot the bump functions
    
    for i,q in enumerate(this_obs):
        this_bump_qx = kde_bump(this_q,this_obs[i],this_h) 
        ax.plot(this_q,this_bump_qx,color='xkcd:salmon',zorder=-1)

        this_bump_max = kde_bump(this_obs[i],this_obs[i],this_h)
        ax.vlines(this_obs[i],ymin=0,ymax=this_bump_max,color='k',ls='--')
    ax.legend()
    ax.set_title('Gaussian KDE, $h={:0.2}$'.format(this_h))

# save fig
this_fig_title = 'fig_kde_describe.png'
fig_all_master[this_fig_title] = fig_kde_describe

# # save just this figure
# fig_kde_describe.savefig('../'+this_fig_title)

## Data Fit to Example Distribution

In this example, we want to compare the convergence rates of HMISE versus Scott's rule.

Since we know the sampling distribution, we can approximate $h_{AMISE}$ fairly well.

### Finding the $h_{MISE}$

From Zambom and Dias (2012), in 1D we know that $h_{MISE}$ for a KDE is

\begin{align}
h_{MISE}=\left( \dfrac{R(K)}{\mu_2^2(K) R(\pi'')}\right)^{1/5}n^{-1/5}
\end{align}

For a Gaussian kernel, $N(0,1)$, we have 

\begin{align}
R(K)&=(2\sqrt{\pi})^{-1} \\
\mu_2^2(K) &= \int_{-\infty}^{\infty}x^2 K(x) dx = Var_K(X)-E_K(X)^2 = 1
\end{align}
since $K(x)$ is a 

For this example, we want to estimate the integral

\begin{align}
R(\pi'')&=\int_\mathcal{D} (\pi''(q))^2 d\mu_q
\end{align}

Since we know the target function in this case, we compute the derivatives by hand and use `scipy.integrate.quad` to estimate the integral.

If $f(x)\sim\text{Beta}(2,5)$, then

\begin{align}
f'(x)&= \frac{d}{dx}\left(\frac{1}{B(2,5)}x^{2-1}(1-x)^{5-1}\right) \\
&=\frac{1}{B(2,5)}\left(-4x(1-x)^3+(1-x)^4\right) \\
\Rightarrow\quad f''(x)&=\frac{1}{B(2,5)}\left(\left(12x(1-x)^2-4(1-x)^3\right)-4(1-x)^3\right) \\
&=\frac{1}{B(2,5)}\left(12x(1-x)^2-8(1-x)^3\right) \\
&=\frac{1}{B(2,5)}\cdot 4\left(3x-2(1-x)\right)(1-x)^2 \\
&=\frac{4}{B(2,5)} \left(5x-2\right)(1-x)^2 
\end{align}

where $B(2,5)$ is a constant of integration related to the Gamma function.

Specifically,
\begin{align}
B(2,5) = \frac{\Gamma(2)\Gamma(5)}{\Gamma(2+5)}
\end{align}

If $f(x)\sim \text{TruncStandardNormal}$, we know that 

\begin{align}
f(x)&=\frac{1}{C}\phi(x)
\end{align}

where $\phi(x)$ is a standard normal distribution and $C$ is a constant of integration (the proportion of area not truncated).

So,

\begin{align}
f(x) &=\frac{1}{C\sqrt{2\pi}}\exp\left(-\frac{x^2}{2}\right) \\
\Rightarrow\quad f'(x) &= \frac{1}{C\sqrt{2\pi}}\cdot -x\cdot \exp\left(-\frac{x^2}{2}\right) \\
\Rightarrow\quad f''(x) &= -\frac{1}{C\sqrt{2\pi}}\cdot \left( x\cdot (-x)\cdot \exp\left(-\frac{x^2}{2}\right) + \exp\left(-\frac{x^2}{2}\right)\right) \\
&=-\frac{1}{C\sqrt{2\pi}}\cdot \left( \exp\left(-\frac{x^2}{2}\right) - x^2\cdot \exp\left(-\frac{x^2}{2}\right) \right) \\
&=\frac{1}{C\sqrt{2\pi}}\cdot \left( x^2-1\right) \exp\left(-\frac{x^2}{2}\right)
\end{align}


I used standard distributions, but now I need to transform the derivatives by a location scale function $y=g(x)=\frac{(x-\mu)}{\sigma}$.

For these transformations,

\begin{align}
f_Y(y) = \frac{f_X(g^{-1}(y))}{\sigma}
\end{align}

where

\begin{align}
g^{-1}(y)&=\sigma y + \mu \\
g'^{-1}(y) &= \sigma
\end{align}

Thus, using the chain rule, we can find that

\begin{align}
f_Y'(y) &= \frac{1}{\sigma}\left(f_X'(g^{-1}(y))g'^{-1}(y)\right) \\
f_Y''(y) &= \frac{1}{\sigma}\left(f_X''(g^{-1}(y))g'^{-1}(y)+f_X'(g^{-1}(y))g''^{-1}(y)\right) \\
&=\left(\frac{1}{\sigma}\right)f_X''(g^{-1}(y))g'^{-1}(y) \\
&=f_X''(g^{-1}(y))
\end{align}

In [None]:
# define the second derivatives
def beta_prime_prime(t,alpha=2,beta=5):
    x = np.array(t, dtype=float)
    
    term1 = (alpha-2)*(alpha-1)*x**(alpha-3)*(1-x)**(beta-1)
    term2 = -2*(alpha-1)*(beta-1)*x**(alpha-2)*(1-x)**(beta-2)
    term3 = (beta-2)*(beta-1)*x**(alpha-1)*(1-x)**(beta-3)
    
    term_sum = term1+term2+term3
    
#     if np.any(x==0):
#         if x.shape == ():
#             term_sum = 0
#         else:
#             term_sum[x==0] = 0
    
    if x.shape == ():
        if x > 1:
            term_sum = 0
        elif x < 0:
            term_sum = 0
    else:
        term_sum[x>1]=np.zeros_like(term_sum[x>1])
        term_sum[x<=0]=np.zeros_like(term_sum[x<=0])
    
    weight = gamma_func(alpha)*gamma_func(beta)/gamma_func(alpha+beta)

    return term_sum*weight

def truncnorm_prime_prime(t,trunc_const):
    x = np.array(t, dtype=float)
    term = (x**2-1)*np.exp(-x**2/2)
    weight = 1/np.sqrt(2*np.pi)*1/trunc_const
    return term*weight



In [None]:
# define the specific function for the integral
truncB = 1-sps.norm.cdf(EM.distB.a)
truncC = 1-sps.norm.cdf(EM.distC.a)

def true_pi_prime_prime(q):  
    x = np.array(q, dtype=float)
    out = np.zeros_like(x)
    
    yA = (x- EM.distA.kwds['loc'])/EM.distA.kwds['scale']
    out += EM.dist_weights[0]*beta_prime_prime(yA)
    
    yB = (x- EM.distB.kwds['loc'])/EM.distB.kwds['scale']
    out += EM.dist_weights[1]*truncnorm_prime_prime(yB,truncB)
    
    yC = (x- EM.distC.kwds['loc'])/EM.distC.kwds['scale']
    out += EM.dist_weights[2]*truncnorm_prime_prime(yC,truncC)
    return out

In [None]:
# define sample sizes for evaluation
N_list = EM.tri_peak_N_list
print(N_list, '\n')

# compute the squared integral
f = lambda q: true_pi_prime_prime(q)**2
Rprimeprime,err = quad(f,-2,40)

# compute hmise
RK = 1/(2*np.sqrt(np.pi))
mu2K = 1
print(RK/(mu2K*Rprimeprime))

h_mise_list = []
for N in N_list:
    h_mise = (1/N*(RK/(mu2K*Rprimeprime)))**(1/5)
    h_mise_list.append(h_mise)
    print(h_mise,'\t', np.std(obs_sample_full[0:N],ddof=1)*N**(-1/5))

In [None]:
this_kde_dict = {'Scott':[],'HMISE':[]}
for i,N in enumerate(N_list):
    this_sample = obs_sample_full[0:N]
    
    # kde using scott's rule
    scott_kde = sps.gaussian_kde(this_sample)
    this_kde_dict['Scott'].append(scott_kde)
    
    # kde using hmise
    kde_factor = h_mise_list[i]/np.std(obs_sample_full[0:N],ddof=1) # divide to get correct h
    hmise_kde = sps.gaussian_kde(this_sample,bw_method=kde_factor)
    this_kde_dict['HMISE'].append(hmise_kde)

The following codeblock gets the integrated squared error (ISE) for different sample sizes.

However, we want to compute the mean $L^2$ error for a bunch of different samples to get the MISE (the expected ISE). This is expensive, so we do this in a different notebook [Expensive Computations](Expensive%20Computations.ipynb), and load the data here.



In [None]:
# # compute L2 errors
# this_L2err_dict = {'Scott':[],'HMISE':[]}
# for i,N in enumerate(N_list):
#     this_err,tol = sf.L2_err_1D(obs_dist,this_kde_dict['Scott'][i],
#                                 -10,25,quad_kwargs={'epsabs':1e-3})
#     this_L2err_dict['Scott'].append(this_err)
    
#     this_err,tol = sf.L2_err_1D(obs_dist,this_kde_dict['HMISE'][i],
#                                 -10,25,quad_kwargs={'epsabs':1e-3})
#     this_L2err_dict['HMISE'].append(this_err)

In [None]:
# Plot the KDEs and ISE
# fig_single_kde_converge, (ax1,ax2) = plt.subplots(1,2)
# fig_single_kde_converge.set_figwidth(10)

# i=3
# ax1.plot(qx,obs_dist.pdf(qx),color='xkcd:black',ls='--',alpha=0.6)
# ax1.plot(qx,this_kde_dict['Scott'][i](qx),
#          label="Scott's Rule")
# ax1.plot(qx,this_kde_dict['HMISE'][i](qx),label='HMISE')
# ax1.set_title('GKDE with $n={}$'.format(N_list[i]))
# ax1.legend()

# ax2.scatter(np.log(N_list),np.log(this_L2err_dict['Scott']))
# ax2.scatter(np.log(N_list),np.log(this_L2err_dict['HMISE']))

In [None]:
# try loading expensive kde error computations from
# saved file
try:
    kde_error_data = np.load(EM.tri_peak_MISE_name+'.npz',allow_pickle=True)
except FileNotFoundError as err:
    print('File not found. Did you run the notebook '+
          'with expensive computations?')
    raise err
    
# check what keys are in the file
print(kde_error_data.files)

In [None]:
# compute the MISE using linear regression
MISE_lines = {}
these_kde_keys = ['ScottMISE', 'OptimalMISE']
limit_B = len(kde_error_data['ScottMISE'])
for label in these_kde_keys:
    # take logarithms to put on log-log-scale
    x_n = np.log(N_list*limit_B) # to agree with the dimesnions of data
    y_err = np.log(kde_error_data[label].reshape(-1,))
    this_MISE_line = np.polynomial.polynomial.Polynomial.fit(x_n,y_err,1)
    MISE_lines[label] = this_MISE_line


This plot shows a GKDE and the Convergence Rate.

In [None]:
# Plot the KDEs and ISE
fig_kde_converge, (ax1,ax2) = plt.subplots(1,2)
fig_kde_converge.set_figwidth(10)
fig_kde_converge.set_figheight(4)


i=3
ax1.plot(qx,obs_dist.pdf(qx),color='xkcd:black',ls='--',alpha=0.7,
         label='Target')
ax1.plot(qx,this_kde_dict['Scott'][i](qx),
         label="Scott's Rule")
ax1.plot(qx,this_kde_dict['HMISE'][i](qx),label='HMISE')
ax1.set_title('GKDE with $n={}$'.format(N_list[i]))
ax1.legend()

line_labels = ["Scott's", "HMISE"]
ax2.set_title('Convergence Rate in $L^2$')
for i,label in enumerate(these_kde_keys):
    # take logarithms to put on log-log-scale
    x_n = np.log(N_list) # to agree with the dimesnions of data
    y_n = MISE_lines[label](x_n)
    y_var = np.std(np.log(kde_error_data[label]),ddof=1,axis=0)
    
    midpoint = (x_n[-1]+x_n[0])/2
    y_label = MISE_lines[label](midpoint)
    y_label += 0.5 if i ==0 else -0.5
    ax2.annotate("$m={:0.2}$".format(MISE_lines[label].coef[1]),
                 xy = (midpoint,y_label), ha='right')
     
    ax2.errorbar(x_n,y_n, yerr = y_var,marker='o',
                barsabove=True,capsize=4,label=line_labels[i])

ax2.legend()
ax2.set_xlabel('Log Sample Size $n$')
ax2.set_ylabel('Log $L^2$ Error')

fig_kde_converge.tight_layout()
# savefig
this_fig_title = 'fig_kde_converge.png'
fig_all_master[this_fig_title] = fig_kde_converge

# # save just this fig
# fig_kde_converge.savefig('../'+this_fig_title)

> **Note:** The expected convergence rate may be estimated poorly for a small sample of $L^2$ errors.

### Confidence Intervals for KDE

We compute the confidence intervals in two different ways.

1. Using the "plug-in" method
2. Using a bootstrap sample

**Formula for the plug-in method:**

\begin{align}
C_{1-\alpha}(q) = \left[\ \widetilde{\pi}_n(q)-z_{1-\frac{\alpha}{2}}\sqrt{\frac{R(K)\widetilde{\pi}_n(q)}{nh^2}}\,\ \widetilde{\pi}_n(q)+z_{1-\frac{\alpha}{2}}\sqrt{\frac{R(K)\widetilde{\pi}_n(q)}{nh^2}}\ \right]
\end{align}

Recall that $R(K)=(2\sqrt{\pi})^{-1}$ from earlier.

**Strategy for the Bootstrap method:**

This is very costly, so we again do this in the [Expensive Computation](Expensive%20Computation.ipynb) notebook and then load the data here.

Essentially, we just compute a bunch of GKDEs, then take the lower and upper pointwise values associated with the 95% quantiles.

In [None]:
# try loading expensive kde error computations from
# saved file
try:
    kde_CI_data = np.load(EM.tri_peak_CI_name+'.npz',allow_pickle=True)
except FileNotFoundError as err:
    print('File not found. Did you run the notebook '+
          'with expensive computations?')
    raise err
    
# check what keys are in the file
print(kde_CI_data.files)

In [None]:
ex_dist = 3
this_h = h_mise_list[ex_dist]
this_n = N_list[ex_dist]
print('N, R(K), h: ', this_n,RK,this_h) # take values from earlier

# CI value
alphaCI = 0.05
z_lower,z_upper = sps.norm.ppf(alphaCI/2),sps.norm.ppf(1-alphaCI/2)

# the  kde values for this estimate
this_kde_vals = this_kde_dict['HMISE'][ex_dist](qx)

# compute the confidence intervals plug-in and bootstrap

pointwiseCI = {'Plugin': None,'Bootstrap': None}
for CI in pointwiseCI:
    if CI=='Plugin':
        error_term = z_lower*np.sqrt(RK*this_kde_vals/(this_n*this_h**2))
        qy_lower = this_kde_vals - error_term
        qy_upper = this_kde_vals + error_term
        
    else:
        this_q = kde_CI_data['HMISECI']
        deviations = np.abs(this_q - this_kde_vals)
        dev_bound = np.quantile(deviations,q=1-alphaCI,axis=0) 
        qy_lower = this_kde_vals-dev_bound
        qy_upper = this_kde_vals+dev_bound
    
    # save the quantiles
    pointwiseCI[CI] = (qy_lower,qy_upper)


In [None]:
# Plot the KDEs and ISE
fig_kde_confidence, axes = plt.subplots(1,2)
fig_kde_confidence.set_figwidth(10)
fig_kde_confidence.set_figheight(4)


for i,ax in enumerate(axes):
    ax.plot(qx,obs_dist.pdf(qx),color='xkcd:black',ls='--',alpha=0.7,
             label='Target')
    ax.plot(qx,this_kde_dict['HMISE'][ex_dist](qx),label='GKDE',color='C1')
    
    label = 'Plugin' if i == 0 else 'Bootstrap'
        
    qy_lower,qy_upper = pointwiseCI[label] 
    ax.fill_between(qx,qy_lower,qy_upper,edgecolor='xkcd:red',
                    facecolor='xkcd:yellow orange',alpha=0.5,zorder=2,
                    label='{:0.0f}% CI'.format(100*(1-alphaCI)))
    
    ax.set_title('{}% CI with $n={}$ Obs.'.format(int(100*(1-alphaCI)),
                                                  N_list[ex_dist]))
    ax.legend()

# # savefig
this_fig_title = 'fig_kde_CI.png'
fig_all_master[this_fig_title] = fig_kde_confidence

# # save just this fig
# fig_kde_confidence.savefig('../'+this_fig_title)

# Bayesian Mixture Model and Dirichlet Process Mixture Model

Recall that the Bayesian Mixture Model and the DP Mixture model are defined as follows:

**Bayesian Finite Mixture Model**
\begin{align}
f(q) &= \sum_{k=1}^K w_k N(\mu_k,\Sigma_k) \\[1.5ex]
(\mu_k,\Sigma_k)&\sim NIW(\mu_0,\kappa_0,\nu_0,\Psi_0) \\[1.5ex]
(w_1,\ldots,w_k) &\sim Dirichlet(\alpha_0)
\end{align}

**Bayesian Finite Mixture Model**
\begin{align}
f(q) &= \sum_{k=1}^K w_k N(\mu_k,\Sigma_k) \\[1.5ex]
(\mu_k,\Sigma_k)&\sim NIW(\mu_0,\kappa_0,\nu_0,\Psi_0) \\[1.5ex]
\beta_k &\sim Beta(1,\alpha_0) \\
\forall k :\quad w_k &= \beta_k \prod_{j=1}^{k-1}(1-\beta_k)
\end{align}


We use `scikit-learn`'s BayesianGaussianMixture package for all computations here. We translate the key components described by the prior model above to sklearn's argument language:

* $\alpha_0$ = `weight_concentration_prior_`, for either the beta distribution in the case of DP or the dirichlet distribution in the case of the finite mixture. 


* `weight_concentration_` : is the posterior vector of parameter values for the weight distribution, either a $K$-vector $\alpha_0+n_k$ for the finite case or a vector of pairs of parameters $(a_k,b_k)$ corresponding to the posterior values of the beta distribution for each $k$ in the inifinite Dirichlet case.


* $\nu_0$, $\hat{\nu}$ = `degrees_of_freedom_prior_` and the posterior value `degrees_of_freedom_`


* $\kappa_0$, $\hat{\kappa}$ = `mean_precision_prior_` and the posterior value `mean_precision_`


* $\Psi_0$ = `covariance_prior_`. Note that there is a typo in the documentation: this is the correct parameter for the *inverse* wishart distribution, consistent with our model (not the wishart distribution).


* $\hat{\Sigma}_k$ = `covariances_` are the estimated posterior covariances for each component $k$ after fitting the model. 
  
  * To obtain $\hat{\Psi}_k$, the posterior parameter for the inverse-wishart distributions, use $\hat{\Sigma}_k\cdot \hat{\nu}$.


* $\mu_0$ = `mean_prior_` center for the means. All $\mu_k$ have this same initial prior center, so $\mu_0$ is vector of the dimension $d$.


* $\hat{\mu}$ = `means_` a vector of the posterior means of each component $k$ of the mixture.


In [None]:
# define the general arguments that will be passed to BGMM
arg_dict = EM.tri_peak_BGMM_arg_dict
print(arg_dict)

## BGM Model

### Plot Prior Distributions 

In [None]:
# define the prior distribution
# different for different examples
this_K = 5
arg_prior_dict_A = {'n_components': this_K,
                'weight_concentration_prior_type': 'dirichlet_distribution',
                'weight_concentration_prior': 1,
                'mean_prior': np.atleast_1d(np.round(np.mean(obs_sample_full))),
                'mean_precision_prior': 1, # kappa
                'degrees_of_freedom_prior': 1, # nu
                'covariance_prior': np.atleast_2d(np.round(np.cov(obs_sample_full))) # psi
                   }

arg_prior_dict_B = arg_prior_dict_A.copy()
arg_prior_dict_B['weight_concentration_prior'] = 20
print('Prior Mean = ',arg_prior_dict_A['mean_prior'])
print('Prior Cov. = ', arg_prior_dict_A['covariance_prior'])
print('Prior Std. Dev. = ', np.sqrt(arg_prior_dict_A['covariance_prior']))

In [None]:
# define two Bayesian prior models
BGMM_A = BayesianGaussianMixture(**arg_prior_dict_A,**arg_dict)
BGMM_B = BayesianGaussianMixture(**arg_prior_dict_B,**arg_dict)

In [None]:
# get sample for this example
this_N = EM.tri_peak_CI_sample_size
this_sample = obs_sample_full[0:this_N]

# fit the BGMMs to this sample
BGMM_A.fit(this_sample.reshape(-1,1))
BGMM_B.fit(this_sample.reshape(-1,1))
print('Model A: Converged? ', BGMM_A.converged_)
print('Model B: Converged? ', BGMM_B.converged_)


In [None]:
# # just look at the fits for reference

# plt.plot(qx,sf.eval_pdf(qx,this_kde_dict['Scott'][3]))
# plt.plot(qx,sf.eval_pdf(qx,BGMM_A),label='alpha={}'.format(BGMM_A.weight_concentration_prior))
# plt.plot(qx,sf.eval_pdf(qx,BGMM_B),label='alpha={}'.format(BGMM_B.weight_concentration_prior))
# plt.hist(this_sample,density=True,edgecolor='k',color='xkcd:sky blue', alpha=0.25)
# plt.legend()

In [None]:
# # check components for reference
# this_model_ref = BGMM_A
# for k in np.arange(this_model_ref.n_components):
#     rel_w = this_model_ref.weights_[k]
#     mean, sig2 = this_model_ref.means_[k],np.squeeze(this_model_ref.covariances_[k])
#     plt.plot(qx,rel_w*sps.norm.pdf(qx,loc=mean, scale=np.sqrt(sig2)),label=k,alpha=1)
#     plt.legend()

In [None]:
# save prior parameters for forward sampling
this_prior_list = {'priorA': {},
                   'priorB': {}}
this_prior_list['priorA']['Fmodel'] = sf.Forward_BGM_Model(BGMM_A,prior=True)
this_prior_list['priorB']['Fmodel'] = sf.Forward_BGM_Model(BGMM_B,prior=True)

# get some samples of parameters
this_M = 8
for key in this_prior_list:
    this_prior_list[key]['param_sample'] = this_prior_list[key]['Fmodel'].rvs(this_M)


In [None]:
# plot the priors sampled
fig_BGM_priors, axes = plt.subplots(2,2)
fig_BGM_priors.set_figheight(8)
fig_BGM_priors.set_figwidth(8)
for i,key in enumerate(this_prior_list):
    # compute the ys of the sample
    # batch GMM returns M x (evalx) array
    prior_ys = sf.batch_GMM_pdf(qx,this_prior_list[key]['param_sample'])
    
    # plot expected prior
    qy = this_prior_list[key]['Fmodel'].pdf(qx)
    axes[i][0].plot(qx,qy,label='Expected')
    
    # plot sampled prior
    for k,qy in enumerate(prior_ys):
        if k==1:
            axes[i][0].plot(qx,qy,color='gray',alpha=0.75,label='Sampled')
        else:
            axes[i][0].plot(qx,qy,color='gray',alpha=0.65)
    axes[i][0].legend()
    
        
    # bar graph of the weights
    weight_bars = np.arange(this_prior_list[key]['Fmodel'].K)+1
    axes[i][1].bar(weight_bars,this_prior_list[key]['Fmodel'].weight_dist.mean(),
                   yerr=np.sqrt(this_prior_list[key]['Fmodel'].weight_dist.var()),
                  error_kw={'capsize': 4})
    # can be used to check reliability of sampling weights is same as expected
#     axes[i][1].bar(weight_bars,this_prior_list[key]['param_sample']['weight'].mean(axis=0))

    # set titles
    this_alpha = this_prior_list[key]['Fmodel'].weight_dist.alpha
    axes[i][0].set_title('Prior, $\\alpha={}$'.format(this_alpha[0]))
    axes[i][1].set_title('Exp. Weights, $\\alpha={}$'.format(this_alpha[0]))

# labels
axes[1,0].set_xlabel('$q$')
axes[1,1].set_xlabel('Component $k$')
# use ylim of first plot
axes[1,1].set_ylim(axes[0,1].get_ylim())

fig_BGM_priors.tight_layout()

# # savefig
this_fig_title = 'fig_BGM_prior.png'
fig_all_master[this_fig_title] = fig_BGM_priors

# # save just this fig
# fig_BGM_priors.savefig('../'+this_fig_title)

### Plot Posteriors

Here we compute forward samples.

In [None]:
# save prior parameters for forward sampling
this_post_list = {'postA': {'model': BGMM_A },
                   'postB': {'model': BGMM_B}}
this_post_list['postA']['Fmodel'] = sf.Forward_BGM_Model(BGMM_A)
this_post_list['postB']['Fmodel'] = sf.Forward_BGM_Model(BGMM_B)

# get some samples of parameters
this_M = 500
for key in this_post_list:
    this_post_list[key]['param_sample'] = this_post_list[key]['Fmodel'].rvs(this_M)


Here we want to plot the component distributions.

In [None]:
# for each model, get the component distributions
for key in this_post_list:
    # for this model
    this_model = this_post_list[key]['model']
    this_K = this_model.n_components
    
    # compute the pdfs
    these_pdf_outs = np.zeros([this_K,len(qx)])
    for k in np.arange(this_K): 
        this_mean = this_model.means_[k] 
        this_cov = this_model.covariances_[k]
        this_weight = this_model.weights_[k]
        these_pdf_outs[k,:] = this_weight*sps.multivariate_normal.pdf(qx,
                                         mean = this_mean, cov = this_cov)
    
    # save the pdf outs
    this_post_list[key]['comp_pdfs'] = these_pdf_outs


In [None]:
# plot the post sampled
fig_BGM_post, axes = plt.subplots(2,2)
fig_BGM_post.set_figheight(8)
fig_BGM_post.set_figwidth(10)
for i,key in enumerate(this_post_list):
    # plot target 
    axes[i][0].plot(qx,sf.eval_pdf(qx,obs_dist), ls='--', label='Target',
                    color='k',alpha=0.4,zorder=-1)
    
    # plot expected posterior
    qy = sf.eval_pdf(qx,this_post_list[key]['model'])
    axes[i][0].plot(qx,qy,label='Expected',zorder=0,linewidth=2)
    



    # bar graph of the weights
    this_K = this_post_list[key]['Fmodel'].K
    weight_bars = np.arange(this_K)+1
    mean_weights = this_post_list[key]['Fmodel'].weight_dist.mean()
    var_weights = this_post_list[key]['Fmodel'].weight_dist.var()
    
    # order K components for color niceness
    large_ind_to_small = np.flip(np.argsort(mean_weights))
    colorlist = ['xkcd:orangered','xkcd:crimson','xkcd:orchid']+['C0']*(this_K-3)
    ordered_colorlist = np.empty(this_K,dtype='object')
    for j,val in enumerate(ordered_colorlist):
        ordered_colorlist[large_ind_to_small[j]] = colorlist[j]
#     print(mean_weights)
#     print(large_ind_to_small)
#     print(ordered_colorlist)
    
    # bar plot
    axes[i][1].bar(weight_bars,mean_weights, yerr=np.sqrt(var_weights),
                  error_kw={'capsize': 4},edgecolor=ordered_colorlist,
                   linewidth=2)

    # plot the components
    for k,y_pdf in enumerate(this_post_list[key]['comp_pdfs']):
        if mean_weights[k] >= mean_weights[large_ind_to_small[2]]:
            axes[i][0].plot(qx,y_pdf, color=ordered_colorlist[k],ls='-.',
                            linewidth=2)
        else:
            axes[i][0].plot(qx,y_pdf, color=colorlist[-1],ls='--')
    axes[i][0].legend()

    # set titles:
    this_alpha = [1,20]
    axes[i][0].set_title('Posterior, $\\alpha_0={}$'.format(this_alpha[i]))
    axes[i][1].set_title('Exp. Weights, $\\alpha_0={}$'.format(this_alpha[i]))

# labels
axes[1,0].set_xlabel('$q$')
axes[1,1].set_xlabel('Component $k$')
# use ylim of first plot
axes[1,1].set_ylim(axes[0,1].get_ylim())


# # savefig
fig_BGM_post.tight_layout()
this_fig_title = 'fig_BGM_post.png'
fig_all_master[this_fig_title] = fig_BGM_post

# # save just this fig
# fig_BGM_post.savefig('../'+this_fig_title)

Check the $L^2$ distance between the two distributions.

In [None]:
sf.L2_err_1D(BGMM_A,BGMM_B,-10,25)

We know that this is not the best estimate. This is due to the BGMM oversmoothing and is similar to when the bandwidth is chosen to be too large.

We can improve our estimate by utilizing prior knowledge of the data-generating distribution.

Specifically, we use the standard deviation of the smallest component in our  mixture to set an appropriate our "window" length.

When we do this, we adjust the prior precision parameter to be less than 1 to reflect that we expect the means to vary a lot more than the given covariances. That is:

\begin{align}
Cov(\mu_i) &> E(\Sigma_i) \\
Cov(\mu_i) &= (1/\kappa_0) E(\Sigma_i) \\
\Rightarrow \kappa_0 &< 1.
\end{align}



In [None]:
# make sure I am using the same sample
this_N = EM.tri_peak_CI_sample_size
this_sample = obs_sample_full[0:this_N]
print(np.cov(this_sample))

# get a window size that produces a better fit
var_min = np.min([EM.distA.var(),EM.distB.var(),EM.distC.var()])
window = np.sqrt(var_min)
print(window)

# scale precision parameter so that variance of the means is 
# variance of the sample
this_kappa0 = window**2/np.cov(this_sample)
print(this_kappa0)

In [None]:
# define new prior with adjusted window
arg_prior_dict_C = arg_prior_dict_A.copy()
arg_prior_dict_C['covariance_prior'] = np.atleast_2d(window)
arg_prior_dict_C['mean_precision_prior'] = this_kappa0
arg_prior_dict_C

In [None]:
# # change number of random starts
# arg_dict_C = arg_dict.copy()
# arg_dict_C['n_init'] = 5
# arg_dict_C['tol'] = 1e-3
# arg_dict_C

In [None]:
# define the new mixture
BGMM_C = BayesianGaussianMixture(**arg_prior_dict_C,**arg_dict)

In [None]:
# make sure I am using the same sample
this_N = EM.tri_peak_CI_sample_size
this_sample = obs_sample_full[0:this_N]

BGMM_C.fit(this_sample.reshape(-1,1))

print('Model C: Converged? ', BGMM_C.converged_)

In [None]:
# # just look at the fits for reference
# plt.plot(qx,sf.eval_pdf(qx,this_kde_dict['Scott'][3]))
# plt.plot(qx,sf.eval_pdf(qx,BGMM_C),label='BGMM C')
# plt.hist(this_sample,density=True,edgecolor='k',color='xkcd:sky blue', alpha=0.25)
# plt.legend()

In [None]:
# # check components for reference
# this_model_ref = BGMM_C
# for k in np.arange(this_model_ref.n_components):
#     rel_w = this_model_ref.weights_[k]
#     mean, sig2 = this_model_ref.means_[k],np.squeeze(this_model_ref.covariances_[k])
#     plt.plot(qx,rel_w*sps.norm.pdf(qx,loc=mean, scale=np.sqrt(sig2)),label=k,alpha=1)
#     plt.legend()

In [None]:
fig_BGMM_kde_compare, axes =plt.subplots(1,2)
fig_BGMM_kde_compare.set_figwidth(10)

for i,ax in enumerate(axes):
    ax.plot(qx,sf.eval_pdf(qx,obs_dist), ls='--', label='Target',
                    color='k',alpha=0.4,zorder=-1)

    if i ==0:
        ax.plot(qx,this_kde_dict['Scott'][3](qx),label="GKDE: $h_{Scott}$")
        ax.plot(qx,sf.eval_pdf(qx,BGMM_A),
                 label="BGMM: $\Psi\\approx s_q^2$")
        ax.set_title('Uninformed Parameter Choice')
        
    else:
        ax.plot(qx,this_kde_dict['HMISE'][3](qx),label='GKDE: $h_{HMISE}$')
        ax.plot(qx,sf.eval_pdf(qx,BGMM_C),
                 label="BGMM: $\Psi\\approx \sigma_{min}^2$")
        ax.set_title('Informed Parameter Choice')
        
    ax.legend()
    ax.set_xlabel('$q$')

# # savefig
this_fig_title = 'fig_BGM_KDE_compare.png'
fig_all_master[this_fig_title] = fig_BGMM_kde_compare

# save just this fig
# fig_BGMM_kde_compare.savefig('../'+this_fig_title)

Now we look at the confidence interval for the density estimate.

We also show that the BGMM models converge. For this, we take a sample of BGMM models and then compute the $L^2$ error. This is in the [Expensive Computations](Expensive%20Computations.ipynb) notebook: we just import the results here.

In [None]:
# add the small window to the pdf list
this_post_list['postC'] = {'model': BGMM_C}
this_post_list['postC']['Fmodel'] = sf.Forward_BGM_Model(BGMM_C)

# get some samples of parameters
this_post_list['postC']['param_sample'] = this_post_list['postC']['Fmodel'].rvs(this_M)

In [None]:
# plot sampled post use for quantiles
# compute the ys of the sample
# batch GMM returns M x (evalx) array
for key in this_post_list:
    post_ys = sf.batch_GMM_pdf(qx,this_post_list[key]['param_sample'])
    this_post_list[key]['sampled_post_ys'] = post_ys


In [None]:
# try loading expensive kde error computations from
# saved file
try:
    BGMM_L2err_data = np.load(EM.tri_peak_BGM_name+'.npz',allow_pickle=True)
except FileNotFoundError as err:
    print('File not found. Did you run the notebook '+
          'with expensive computations?')
    raise err
    
# check what keys are in the file
print(BGMM_L2err_data.files)

In [None]:
# plt.plot(BGMM_L2err_data['BGMM_C_L2_err'] - BGMM_L2err_data['DPMM_C_L2_err'])

In [None]:
# Plot the KDEs and ISE
fig_BGM_confidence, axes = plt.subplots(1,2)
fig_BGM_confidence.set_figwidth(10)
fig_BGM_confidence.set_figheight(4)

# define confidence level
this_CIa = 0.05

these_BGMM_errs = ['BGMM_A_L2_err','BGMM_C_L2_err']
for i,ax in enumerate(axes):
    if i == 0:
        axes[i].plot(qx,obs_dist.pdf(qx),color='xkcd:black',ls='--',alpha=0.7,
                 label='Target')
        qy = sf.eval_pdf(qx,this_post_list['postC']['model'])
        axes[i].plot(qx,qy,label='Exp. Posterior',color='C1')

        post_ys = this_post_list['postC']['sampled_post_ys']
        qy_lower = np.quantile(post_ys,q=this_CIa/2,axis=0)
        qy_upper = np.quantile(post_ys,q=1-this_CIa/2,axis=0)
        axes[i].fill_between(qx,qy_lower,qy_upper,edgecolor='xkcd:red',
                        facecolor='xkcd:yellow orange',alpha=0.5,zorder=2,
                        label='{:0.0f}% CI'.format(100*(1-alphaCI)))

        axes[i].set_title('{}% CI with $n={}$ Obs.'.format(int(100*(1-alphaCI)),
                                                      N_list[ex_dist]))
        axes[i].legend()
        axes[i].set_xlabel('$q$')
    else:
        line_labels = ["BGMM: $\Psi\\approx s_q^2$", 
                       "BGMM: $\Psi\\approx \sigma_{min}^2$"]
        ax.set_title('Convergence Rate in $L^2$')
        for i,label in enumerate(these_BGMM_errs):
            # take logarithms to put on log-log-scale
            print(label)
            x_n = np.log(N_list) # to agree with the dimesnions of data
            y_n = np.mean(np.log(BGMM_L2err_data[label]),axis=0)
            y_var = np.std(np.log(BGMM_L2err_data[label]),ddof=1,axis=0)

            midpoint = (x_n[-1]+x_n[0])/2
#             y_label = MISE_lines[label](midpoint)
#             y_label += 0.5 if i ==0 else -0.5
#             ax.annotate("$m={:0.2}$".format(MISE_lines[label].coef[1]),
#                          xy = (midpoint,y_label), ha='right')

            ax.errorbar(x_n,y_n, yerr = y_var,marker='o',
                        barsabove=True,capsize=4,label=line_labels[i])

        ax.legend()
        ax.set_xlabel('Log Sample Size $n$')
        ax.set_ylabel('Log $L^2$ Error')

# # savefig
fig_BGM_confidence.tight_layout()
this_fig_title = 'fig_BGM_confidence_L2.png'
fig_all_master[this_fig_title] = fig_BGM_confidence

# # save just this fig
# fig_BGM_confidence.savefig('../'+this_fig_title)

## Dirichlet Process Mixture Model

### Prior DPMM

First we visualize several prior distributions.

In [None]:
# define the general arguments that will be passed to DPMM
arg_dict = EM.tri_peak_BGMM_arg_dict
print(arg_dict)

Note that the key argument change is the weight concentration prior type (changed to Dirichlet process).

In [None]:
# set up prior for this example
this_K = 35

# make sure I am using the same sample
this_N = EM.tri_peak_CI_sample_size
this_sample = obs_sample_full[0:this_N]

# prior dictionary
arg_prior_dict_DP = {'n_components': this_K,
                'weight_concentration_prior_type': 'dirichlet_process',
                'weight_concentration_prior': 1,
                'mean_prior': np.atleast_1d(np.round(np.mean(this_sample))),
                'mean_precision_prior': 1, # kappa
                'degrees_of_freedom_prior': 1, # nu
                'covariance_prior': np.atleast_2d(np.round(np.cov(this_sample))) # psi
                   }

# prior dict with large alpha
arg_prior_dict_DP_B = arg_prior_dict_DP.copy()
arg_prior_dict_DP_B['weight_concentration_prior'] = 5 # alpha = 5

print(arg_prior_dict_DP)

In [None]:
# define the DPMM
DPMM_A = BayesianGaussianMixture(**arg_prior_dict_DP,**arg_dict)

# dpmm with large alpha
DPMM_B = BayesianGaussianMixture(**arg_prior_dict_DP_B,**arg_dict)

In [None]:
# fit the DPMM
DPMM_A.fit(this_sample.reshape(-1,1))
DPMM_B.fit(this_sample.reshape(-1,1))

In [None]:
# # just look at the fits for reference
# plt.plot(qx,sf.eval_pdf(qx,this_kde_dict['Scott'][3]))
# plt.plot(qx,sf.eval_pdf(qx,DPMM_A),label='DPMM A')
# plt.hist(this_sample,density=True,edgecolor='k',color='xkcd:sky blue', alpha=0.25)
# plt.legend()

In [None]:
# # check components for reference
# this_model_ref = DPMM_A
# for k in np.arange(this_model_ref.n_components):
#     rel_w = this_model_ref.weights_[k]
#     if rel_w > 0.01:
#         mean, sig2 = this_model_ref.means_[k],np.squeeze(this_model_ref.covariances_[k])
#         plt.plot(qx,rel_w*sps.norm.pdf(qx,loc=mean, scale=np.sqrt(sig2)),label=k,alpha=1)
        
#     plt.legend()

In [None]:
# save prior parameters for forward sampling
this_prior_list = {'priorA': {},
                   'priorB': {}}
this_prior_list['priorA']['Fmodel'] = sf.Forward_BGM_Model(DPMM_A,prior=True)
this_prior_list['priorB']['Fmodel'] = sf.Forward_BGM_Model(DPMM_B,prior=True)

# get some samples of parameters
this_M = 1000
for key in this_prior_list:
    this_prior_list[key]['param_sample'] = this_prior_list[key]['Fmodel'].rvs(this_M)
    prior_ys = sf.batch_GMM_pdf(qx,this_prior_list[key]['param_sample'])
    this_prior_list[key]['priorys'] = prior_ys

In [None]:
# plot the priors sampled
fig_DPMM_priors, axes = plt.subplots(2,2)
fig_DPMM_priors.set_figheight(8)
fig_DPMM_priors.set_figwidth(8)

this_alpha = [1,5]
trunc_K_plot = sum(np.mean(this_prior_list['priorB']['param_sample']['weight'],axis=0)>0.01)
for i,key in enumerate(this_prior_list):
    # compute the ys of the sample
    # batch GMM returns M x (evalx) array
    prior_ys_subset = this_prior_list[key]['priorys'][0:5]
    
    # plot expected prior
    qy = this_prior_list[key]['Fmodel'].pdf(qx)
    axes[i][0].plot(qx,qy,label='Expected')
    
    # plot sampled prior
    for k,qy in enumerate(prior_ys_subset):
        if k==1:
            axes[i][0].plot(qx,qy,color='gray',alpha=0.75,label='Sampled')
        else:
            axes[i][0].plot(qx,qy,color='gray',alpha=0.65)
    axes[i][0].legend()
    
        
    # bar graph of the weights
    weight_bars = np.arange(this_prior_list[key]['Fmodel'].K)+1
    height_bars = np.mean(this_prior_list[key]['param_sample']['weight'],axis=0)
    height_std = np.sqrt(np.var(this_prior_list[key]['param_sample']['weight'],ddof=1,axis=0))
    low_err = np.min(np.array([height_std,height_bars]),axis=0)
    err_bars = np.array([low_err,height_std])
    axes[i][1].bar(weight_bars[0:trunc_K_plot+1],height_bars[0:trunc_K_plot+1],
                   yerr=err_bars[:,0:trunc_K_plot+1],
                  error_kw={'capsize': 2,'linewidth':1})

    # set titles
    axes[i][0].set_title('Prior, $\\alpha={}$'.format(this_alpha[i]))
    axes[i][1].set_title('Exp. Weights, $\\alpha={}$'.format(this_alpha[i]))

# labels
axes[1,0].set_xlabel('$q$')
axes[1,1].set_xlabel('Component $k$')
# use ylim of first plot
axes[1,1].set_ylim(axes[0,1].get_ylim())

# # savefig
fig_DPMM_priors.tight_layout()
this_fig_title = 'fig_DPMM_prior.png'
fig_all_master[this_fig_title] = fig_DPMM_priors

# # save just this fig
# fig_DPMM_priors.savefig('../'+this_fig_title)

Now we plot the histogram of truncated values.

In [None]:
np.sum(1-np.cumsum(this_prior_list[key]['param_sample']['weight'],axis=1)>0.01,axis=1).shape

In [None]:
1-np.cumsum(this_prior_list[key]['param_sample']['weight'],axis=1)[0]

In [None]:
fig_DPMM_hist_trunc, axes = plt.subplots(1,2)
fig_DPMM_hist_trunc.set_figwidth(8)
# fig_DPMM_hist_trunc.set_figwidth(3/4*6)

for i,key in enumerate(this_prior_list):
    # counts the number of components before 99% of the weight probability reached
    K_trunc = np.sum(1-np.cumsum(this_prior_list[key]['param_sample']['weight'],
                                 axis=1)>0.01,axis=1)
    
    axes[i].hist(K_trunc+1,edgecolor='k')
    axes[i].set_xlabel('Truncatation $K_{\\delta}$')
    axes[i].set_title('DPMM Components, $\\alpha={}$'.format(this_alpha[i]))

# # savefig
this_fig_title = 'fig_DPMM_truncation.png'
fig_all_master[this_fig_title] = fig_DPMM_hist_trunc

# # save just this fig
# fig_DPMM_hist_trunc.savefig('../'+this_fig_title)

### DPMM Posterior

For the posterior plots here, we use the better covariance estimate for $\Psi$.

In [None]:
# basic params dictionary
print(arg_dict)
print()

# same as prior C for BGMM but with Dirichlet Process
arg_prior_dict_DP_C = arg_prior_dict_C.copy()
arg_prior_dict_DP_C['n_components'] = 30
arg_prior_dict_DP_C['weight_concentration_prior_type'] = 'dirichlet_process'
print(arg_prior_dict_DP_C)

In [None]:
# define the model
DPMM_C = BayesianGaussianMixture(**arg_prior_dict_DP_C,**arg_dict)

# fit the model
DPMM_C.fit(this_sample.reshape(-1,1))

Compute the expected posterior distribution and plot the components.

In [None]:
# for each model, get the component distributions
this_post_list['post_DPMM'] = {'model': DPMM_C,
                               'Fmodel': sf.Forward_BGM_Model(DPMM_C)}

this_model = this_post_list['post_DPMM']['model']
this_K = this_model.n_components

# compute the pdfs
key = 'post_DPMM'
these_pdf_outs = np.zeros([this_K,len(qx)])
for k in np.arange(this_K): 
    this_mean = this_model.means_[k] 
    this_cov = this_model.covariances_[k]
    this_weight = this_model.weights_[k]
    these_pdf_outs[k,:] = this_weight*sps.multivariate_normal.pdf(qx,
                                     mean = this_mean, cov = this_cov)

# save the pdf outs
this_post_list[key]['comp_pdfs'] = these_pdf_outs


In [None]:
# plot the post sampled
fig_DPMM_post, axes = plt.subplots(1,2)
fig_DPMM_post.set_figwidth(10)

key = 'post_DPMM'
# plot target 
axes[0].plot(qx,sf.eval_pdf(qx,obs_dist), ls='--', label='Target',
                color='k',alpha=0.4,zorder=-1)

# plot expected posterior
qy = sf.eval_pdf(qx,this_post_list[key]['model'])
axes[0].plot(qx,qy,label='Expected',zorder=0,linewidth=2)


# bar graph of the weights
mean_weights = this_post_list[key]['Fmodel'].weight_dist.mean()

# order K components: keep the top 5
topX = 5
large_ind_to_small = np.flip(np.argsort(mean_weights))
print(large_ind_to_small[0:topX])
colorlist = ['xkcd:orangered','xkcd:crimson','xkcd:orchid']+['C0']*(topX-3)
weight_bars = np.arange(topX)+1

# get forward sample to compute weight variances
weight_std = np.sqrt(np.var(this_post_list['post_DPMM']['Fmodel'].weight_dist.rvs(500),
                    ddof=1,axis=0))
low_err = np.min(np.array([weight_std,mean_weights]),axis=0)
err_bars = np.array([low_err,weight_std])

# bar plot
axes[1].bar(weight_bars,mean_weights[large_ind_to_small[0:topX]],edgecolor=colorlist,
               linewidth=2,tick_label=large_ind_to_small[0:topX]+1,
                yerr=err_bars[:,large_ind_to_small[0:topX]],
                  error_kw={'capsize': 4}
               )

# plot the components
for k,y_pdf in enumerate(this_post_list[key]['comp_pdfs'][large_ind_to_small[0:topX]]):
#     print(k,mean_weights[k])
    axes[0].plot(qx,y_pdf, color=colorlist[k],ls='-.',
                    linewidth=2)

axes[0].legend()

# set titles:
this_alpha = [1]
axes[0].set_title('Posterior DPMM, $\\alpha={}$'.format(this_alpha[0]))
axes[1].set_title('Posterior Average Weights')

# labels
axes[0].set_xlabel('$q$')
axes[1].set_xlabel('Component # $k$')
# use ylim of first plot

# # savefig
this_fig_title = 'fig_DPMM_post.png'
fig_all_master[this_fig_title] = fig_DPMM_post

# # save just this fig
# fig_DPMM_post.savefig('../'+this_fig_title)

### Credible Interval of the DPMM and Convergence

In the next plot we show the DPMM Credible intervals and also show the convergence rates. 

The $L^2$ convergence rate is loaded from the same file as the BGMM $L^2$ error.

In [None]:
# sample the posterior and save parameters
key = 'post_DPMM'
this_M = 500
this_post_list[key]['param_sample'] = this_post_list[key]['Fmodel'].rvs(this_M)

# compute the posterior sampled pdfs
post_ys = sf.batch_GMM_pdf(qx,this_post_list[key]['param_sample'])
this_post_list[key]['sampled_post_ys'] = post_ys

In [None]:
# make sure that the file is loaded
BGMM_L2err_data.files

In [None]:
# Plot the KDEs and ISE
fig_DPMM_confidence, axes = plt.subplots(1,2)
fig_DPMM_confidence.set_figwidth(12)

# define confidence level
this_CIa = 0.05

key = 'post_DPMM'
these_DPMM_errs = ['BGMM_C_L2_err','DPMM_C_L2_err']
for i,ax in enumerate(axes):
    if i == 0:
        axes[i].plot(qx,obs_dist.pdf(qx),color='xkcd:black',ls='--',alpha=0.7,
                 label='Target')
        qy = sf.eval_pdf(qx,this_post_list[key]['model'])
        axes[i].plot(qx,qy,label='Exp. Posterior',color='C1')

        post_ys = this_post_list[key]['sampled_post_ys']
        qy_lower = np.quantile(post_ys,q=this_CIa/2,axis=0)
        qy_upper = np.quantile(post_ys,q=1-this_CIa/2,axis=0)
        axes[i].fill_between(qx,qy_lower,qy_upper,edgecolor='xkcd:red',
                        facecolor='xkcd:yellow orange',alpha=0.5,zorder=2,
                        label='{:0.0f}% CI'.format(100*(1-alphaCI)))

        axes[i].set_title('{}% CI with $n={}$ Obs.'.format(int(100*(1-alphaCI)),
                                                      N_list[ex_dist]))
        axes[i].legend()
        axes[i].set_xlabel('$q$')
    else:
        line_labels = ["BGMM", 
                       "DPMM"]
        ax.set_title('Convergence Rate in $L^2$')
        for i,label in enumerate(these_DPMM_errs):
            print(label)
            # take logarithms to put on log-log-scale
            x_n = np.log(N_list) # to agree with the dimesnions of data
            y_n = np.mean(np.log(BGMM_L2err_data[label]),axis=0)
            y_var = np.std(np.log(BGMM_L2err_data[label]),ddof=1,axis=0)

            midpoint = (x_n[-1]+x_n[0])/2
#             y_label = MISE_lines[label](midpoint)
#             y_label += 0.5 if i ==0 else -0.5
#             ax.annotate("$m={:0.2}$".format(MISE_lines[label].coef[1]),
#                          xy = (midpoint,y_label), ha='right')

            ax.errorbar(x_n,y_n, yerr = y_var,marker='o',
                        barsabove=True,capsize=4,label=line_labels[i])

        ax.legend()
        ax.set_xlabel('Log Sample Size $n$')
        ax.set_ylabel('Log $L^2$ Error')

# # savefig
this_fig_title = 'fig_DPMM_confidence_L2.png'
fig_all_master[this_fig_title] = fig_DPMM_confidence

# #save just this fig
# fig_DPMM_confidence.savefig('../'+this_fig_title)

### Compare DPMM to BGMM

In this section, we make a BGMM with $K=30$ components so that we can compare it to the DPMM.

In [None]:
# covariance based on the bandwidth
psi_hmise = np.atleast_2d(h_mise_list[3]**2)

# make a DPMM with new hmise 
arg_prior_dict_DP_D = arg_prior_dict_DP_C.copy()
arg_prior_dict_DP_D['covariance_prior'] = psi_hmise
print('DPMM D:')
print(arg_prior_dict_DP_D)
print()
# make a BGMM with similar parameters to the DPMM (i.e., 30 components)
arg_prior_dict_D = arg_prior_dict_DP_D.copy()
arg_prior_dict_D['weight_concentration_prior_type'] = 'dirichlet_distribution'
print('BGMM D:')
arg_prior_dict_D

In [None]:
# define the BGMM and fit the distribution
DPMM_D = BayesianGaussianMixture(**arg_prior_dict_DP_D,**arg_dict)
DPMM_D.fit(this_sample.reshape(-1,1))

# define the BGMM and fit the distribution
BGMM_D = BayesianGaussianMixture(**arg_prior_dict_D,**arg_dict)
BGMM_D.fit(this_sample.reshape(-1,1))

In [None]:
# # just look at the fits for reference
# plt.plot(qx,sf.eval_pdf(qx,DPMM_D),label='DPMM D')
# plt.plot(qx,sf.eval_pdf(qx,BGMM_D),label='BGMM D')
# plt.hist(this_sample,density=True,edgecolor='k',color='xkcd:sky blue', alpha=0.25)
# plt.legend()

In [None]:
# add these models to the post list
this_post_list['post_DPMM_D'] = {'model': DPMM_D,
                               'Fmodel': sf.Forward_BGM_Model(DPMM_D)}

this_post_list['BGMM_D'] = {'model': BGMM_D,
                            'Fmodel': sf.Forward_BGM_Model(BGMM_D)}


In [None]:
# plot the post sampled
fig_DPMM_BGMM_compare, axes = plt.subplots(1,2)
fig_DPMM_BGMM_compare.set_figwidth(10)
fig_DPMM_BGMM_compare.set_figheight(5)


# plot target 
axes[0].plot(qx,sf.eval_pdf(qx,obs_dist), ls='--', label='Target',
                color='k',alpha=0.4,zorder=-1)

# setup colors and stuff
these_keys = ['post_DPMM_D','BGMM_D']
hatches = {'post_DPMM_D': None, 'BGMM_D': '/'}
fill_colors = {'post_DPMM_D': 'C0', 'BGMM_D': 'xkcd:orchid'}
edge_colors = {'post_DPMM_D': 'xkcd:sky blue', 'BGMM_D': 'xkcd:dark violet'}
alphas = {'post_DPMM_D': 0.85, 'BGMM_D': 0.75}
plot_labels = {'post_DPMM_D': 'DPMM', 'BGMM_D': 'BGMM'}

# make plot
for key in these_keys:
    # plot expected posterior for DPMM and BGMM
    qy = sf.eval_pdf(qx,this_post_list[key]['model'])
    axes[0].plot(qx,qy,label=plot_labels[key], color=fill_colors[key],
                 zorder=0,linewidth=3,alpha=0.8)

    # bar graph of the weights
    # get mean weights
    mean_weights = this_post_list[key]['Fmodel'].weight_dist.mean()

    # order K components: keep the top 7
    topX = 7
    large_ind_to_small = np.flip(np.argsort(mean_weights))
    print(large_ind_to_small[0:topX])
    colorlist = ['xkcd:orangered','xkcd:crimson','xkcd:orchid']+['C0']*(topX-3)
    weight_bars = np.arange(topX)+1

    # get forward sample to compute weight variances
    weight_std = np.sqrt(np.var(this_post_list['post_DPMM']['Fmodel'].weight_dist.rvs(500),
                        ddof=1,axis=0))
    low_err = np.min(np.array([weight_std,mean_weights]),axis=0)
    err_bars = np.array([low_err,weight_std])

    # bar plot
    axes[1].bar(weight_bars,mean_weights[large_ind_to_small[0:topX]],
                color=fill_colors[key],alpha=alphas[key],edgecolor=edge_colors[key],
                   linewidth=2,tick_label=None,#large_ind_to_small[0:topX]+1,
                    yerr=err_bars[:,large_ind_to_small[0:topX]],
                      error_kw={'capsize': 4}, hatch=hatches[key],
                    ecolor=edge_colors[key],label=plot_labels[key]
                   )
axes[1].set_xticklabels([])
axes[0].legend()
axes[1].legend()

# set titles:
this_alpha = [1]
axes[0].set_title('Posterior DPMM vs. BGMM, $\\Psi=h_{MISE}^2$')
axes[1].set_title('Posterior Average Weights')

# labels
axes[0].set_xlabel('$q$')
axes[1].set_xlabel('Components')
# use ylim of first plot

# # savefig
fig_DPMM_BGMM_compare.tight_layout()
this_fig_title = 'fig_DPMM_BGMM_compare.png'
fig_all_master[this_fig_title] = fig_DPMM_BGMM_compare

# # save just this fig
# fig_DPMM_BGMM_compare.savefig('../'+this_fig_title)

In [None]:
# make sure all the figures are there
for key in fig_all_master.keys():
    print(key)

In [None]:
# # # save all figs
# for figfilename in fig_all_master:
#     fig_all_master[figfilename].savefig('../'+figfilename,
#                                         dpi=250,bbox_inches='tight')

## Random Notes (TO BE CLEANED LATER)