In [None]:
# Imports necessary Packages
import matplotlib
import numpy as np
import scipy as sp
import pandas as pd

# Specific Plotting Packages
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib import patches
from mpl_toolkits.mplot3d import Axes3D
from PIL import Image


# Specific Stats Packages
from scipy.stats import gaussian_kde
from scipy.stats import truncexpon
from scipy.stats import uniform
from scipy.stats import norm
from scipy.stats import chi2
from scipy.stats import normaltest
from scipy.stats import multivariate_normal as mv_norm

# for showing youtube videos
from IPython.display import YouTubeVideo

# for Q map
from scipy import optimize

# to show in notebook
%matplotlib inline

In [None]:
# set a random seed
np.random.seed(24513)

In [None]:
matplotlib.rcParams.update({'font.size': 14})

In [None]:
# defines a general color pallette
col_highlight = 'xkcd:yellow'
col_blue = 'xkcd:sky'
col_red = 'xkcd:red orange'

# Stochastic Wobbly Plate Example

In this example we have a linear system of equations affected by different noise models. The idea is that we have two sensors which measure the height of the plate and we are trying to determine the variation of the slopes of the wobbly plate. We solve this problem using Data-consistent inversion (see the dissertation for details).

Specifically, the model is:

\begin{align}
q=Q(\lambda)=X\lambda + y_0
\end{align}

where $X$ is a matrix of the locations of the measurement instruments, $y_0$ is the height of the plate above the origin, and $\lambda$ is the slope of the wobbly plate.

We are insterested in investigating the case where this system is perturbed by additional stochastic uncertainties. Specifically, we look at the differences between an additive versus a location noise model.

In the additive noise model, we have that:

\begin{align}
q=\widehat{Q}(\lambda,\xi)=Q(\lambda)+\xi= (X\lambda + y_0)+\xi
\end{align}


On the other hand, in a location noise model, we have that:

\begin{align}
q=\widehat{Q}(\lambda,\xi)= (X+\xi)\lambda + y_0)
\end{align}


As shown in this notebook, this can lead to very different results because $\lambda$ scales the noise parameter $\xi$ in the location noise model.



### Defines Model Map

> You can adjust whether map produces additive or location noise by setting corresponding option to True or False. Also, you can pass a custom pdf as long as it is a scipy stats-like class with corresponding pdf and rvs calls.

In [None]:
# our model map
def Y(beta,x, height=3, 
      additive_noise=False, add_noise_pdf=norm(0,0.15), 
      location_noise=False, loc_noise_pdf=norm(0,0.15)):
    
    '''
    beta: matrix of coefficients, each row a separate observation
    x: location vector of measurements OR matrix. If matrix, must be the same size as beta
    height: fixed height of the center of the wobbly plate
    addiive_noise: is there additive noise?
    add_noise_pdf: scipy stats class specificying the additive noise model
    location_noise: is there location noise?
    loc_noise_pdf: scipy stats class specifying location noise model
    '''
    
    y0 = height # fixed height
    
    if beta.shape != x.shape:
        # reshape x-array to be same size as beta
        columns = np.shape(beta)[0]
        x_values = np.repeat(x,columns).reshape(np.shape(x)[0],columns).transpose()
    else:
        x_values = x
    
    if location_noise:
        x_values = x_values+loc_noise_pdf.rvs(x_values.shape)
    
    
    y_output = y0+np.sum(beta*x_values,axis=1)
    
    # add additive noise
    if additive_noise:
        y_output = y_output+add_noise_pdf.rvs(y_output.shape)
    
    return y_output, x_values

# No Noise Example

Here we look at a scenario where there is no additive or location noise.

What does the solution to the wobbly-plate problem look like using Data-consistent inversion?

### Generate the Data

In [None]:
n_obs = 250 # number of observations
locations = np.array([[0.6,0.6],[0.8,0.6]]) # location of observations

> Use `lower_a1` and `lower_a2` to set the domain $\Lambda$. These two parameters set the center of the rectangular bounding box (set at the origin, $\Lambda$ is $[0,2]\times[0,2]$).

In [None]:
# location params for true parameter distribution: center at
lower_a1 = 1
lower_a2 = 1

# beta values at each observation point
beta1_data = uniform.rvs(lower_a1-0.15,0.6,n_obs)
beta2_data = uniform.rvs(lower_a2+0.6,0.25,n_obs)

beta_data = np.stack((beta1_data,beta2_data),axis=1)

**Plots of Initial Description vs. Target PDF**

In [None]:
slope_fig, ax = plt.subplots(1)
# set up prior range of slopes
bounding_range = np.linspace(lower_a1-1,lower_a1+1,100)
plt.plot(bounding_range,np.max(bounding_range)*np.ones(100),color=col_blue,ls='--',linewidth=2,label='Initial Description of Slopes') # top
plt.plot(np.max(bounding_range)*np.ones(100),bounding_range,color=col_blue,ls='--',linewidth=2) # right
plt.plot(bounding_range,np.min(bounding_range)*np.ones(100),color=col_blue,ls='--',linewidth=2) # bottom
plt.plot(np.min(bounding_range)*np.ones(100),bounding_range,color=col_blue,ls='--',linewidth=2) # left
plt.title('Parameter Space: Slope of Plate')
plt.xlabel('$\lambda_1$')
plt.ylabel('$\lambda_2$')

# scatter plot of slopes
#plt.scatter(beta1_data,beta2_data,label="Target Sampled Distribution")
#plt.scatter(beta_Q_inv[0],beta_Q_inv[1])

# plot actual pdf of slopes
slope_pdf_plot = patches.Rectangle((lower_a1-0.15,lower_a2+0.6),0.6,0.25,
                                   label="Target Distribution",
                                   edgecolor='k',facecolor=col_highlight,alpha=1)
ax.add_patch(slope_pdf_plot)

plt.xlim(lower_a1-1.2,lower_a1+1.2)
plt.ylim(lower_a2-1.2,lower_a2+1.2)
plt.legend()

In [None]:
#slope_fig.savefig('wobbly-target.png',bbox_inches='tight')

In [None]:
# simulated QoI data from data variable samples
# NO NOISE
y_data_loc1,loc1 = Y(beta_data,locations[0],location_noise=False)
y_data_loc2,loc2 = Y(beta_data,locations[1],location_noise=False)
y_data = np.array([y_data_loc1,y_data_loc2])

# calculate a gaussian kde for data
data_kde_pdf = gaussian_kde(y_data)

In [None]:
x = np.linspace(4,6,100)
data_fig = plt.figure()
plt.title('GKDE Distribution of Sampled Height Data')
plt.plot(x,gaussian_kde(y_data_loc1)(x),label='Obs. at Loc A',
         ls='--',linewidth=2.25,color='xkcd:sky')
plt.plot(x,gaussian_kde(y_data_loc2)(x),label='Obs. at Loc B',
         ls=':',linewidth=3, color='xkcd:red orange')
plt.ylabel('Relative Frequency')
plt.xlabel('Height, $y$')
plt.legend()

In [None]:
# data_fig.savefig('wobbly-data.png',bbox_inches='tight')

**Data Consistent Inversion:**

In [None]:
# generate initial samples
beta1_initial = uniform(lower_a1-1,2)
beta2_initial = uniform(lower_a2-1,2)

# number of samples for approximating push-forward
n_samples = 4000

# generate a sample from our INITIAL pdfs
beta1_sample_initial = beta1_initial.rvs(n_samples)
beta2_sample_initial = beta2_initial.rvs(n_samples)


In [None]:
# defines initial distribution class objects using scipy package (imported at the top)
# use "rvs" method to generate samples from these objects
# use "pdf" method to evaluate the pdf at a given value

# combine into a matrix of observations
beta_initial = np.stack((beta1_sample_initial,beta2_sample_initial),axis=1)

# calculate data values
y_push_forward = np.array([ Y(beta_initial,locations[0])[0], 
                            Y(beta_initial,locations[1])[0]])

# calculate a gaussian kde for the push-forward of the sampled inital values
push_forward_kde_pdf = gaussian_kde(y_push_forward)


# calculate maximum of the ratio
M = np.max(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward))

# generate random numbers from uniform for accept-reject for each sample value
test_value = np.random.uniform(0,1,n_samples)

# calculate the ratio for accept reject: data_kde/push_kde/M and compare to test sample
# is the kde ratio > test value?
accept_or_reject_samples = np.greater(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward)/M,
                            test_value)

# accepted values of posterior sample
updated_beta1_sample = beta1_sample_initial[accept_or_reject_samples]
updated_beta2_sample = beta2_sample_initial[accept_or_reject_samples]


In [None]:
accept_or_reject_samples.shape

In [None]:
update_fig, ax = plt.subplots(1)
# plot actual pdf of slopes
target_pdf_plot = patches.Rectangle((lower_a1-0.15,lower_a2+0.6),0.6,0.25,
                                   label="Target Sample",
                                   edgecolor='xkcd:yellow',facecolor='C2',alpha=1,
                                   zorder=2,fill=False,linewidth=3.5)
ax.add_patch(target_pdf_plot)
plt.scatter(beta1_sample_initial,beta2_sample_initial,marker='o',zorder=0,
            color='xkcd:sky',alpha=1, label='Initial Sample')
plt.scatter(updated_beta1_sample,updated_beta2_sample,
            marker='x',s=140,
            color='xkcd:red orange',edgecolor='k',zorder=1,alpha=1,
            label='Update Sample')
plt.legend()
plt.title('Data Consistent Update')
plt.xlabel('$\lambda_1$')
plt.ylabel('$\lambda_2$')

# direct inversion using Q^-1 just for testing
# lam_inverted = np.dot(np.linalg.inv(locations),y_data-3)
# ax.scatter(lam_inverted[0],lam_inverted[1])

In [None]:
# update_fig.savefig('wobbly-update.png',bbox_inches='tight')

In [None]:
# check grayscale compatability
# fname = 'wobbly-update.png'
# image = Image.open(fname).convert("L")
# arr = np.asarray(image)
# plt.imshow(arr, cmap='gray', vmin=0, vmax=255)

# Additive Noise Data

Here we investigate the same setup with the additive noise model.

In [None]:
# simulated QoI data from data variable samples
# ADDITIVE NOISE
y_data_loc1,loc1 = Y(beta_data,locations[0],additive_noise=True)
y_data_loc2,loc2 = Y(beta_data,locations[1],additive_noise=True)
y_data = np.array([y_data_loc1,y_data_loc2])

# calculate a gaussian kde for data
data_kde_pdf = gaussian_kde(y_data)

**Data Consistent Inversion:**

In [None]:
# generate initial samples
beta1_initial = uniform(lower_a1-1,2)
beta2_initial = uniform(lower_a2-1,2)

# number of samples for approximating push-forward
n_samples = 4000

# generate a sample from our INITIAL pdfs
beta1_sample_initial = beta1_initial.rvs(n_samples)
beta2_sample_initial = beta2_initial.rvs(n_samples)


In [None]:
# defines initial distribution class objects using scipy package (imported at the top)
# use "rvs" method to generate samples from these objects
# use "pdf" method to evaluate the pdf at a given value

# combine into a matrix of observations
beta_initial = np.stack((beta1_sample_initial,beta2_sample_initial),axis=1)

# calculate data values
y_push_forward = np.array([ Y(beta_initial,locations[0],additive_noise=True)[0], 
                            Y(beta_initial,locations[1],additive_noise=True)[0]])

# calculate a gaussian kde for the push-forward of the sampled inital values
push_forward_kde_pdf = gaussian_kde(y_push_forward)


# calculate maximum of the ratio
M = np.max(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward))

# generate random numbers from uniform for accept-reject for each sample value
test_value = np.random.uniform(0,1,n_samples)

# calculate the ratio for accept reject: data_kde/push_kde/M and compare to test sample
# is the kde ratio > test value?
accept_or_reject_samples = np.greater(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward)/M,
                            test_value)

# accepted values of posterior sample
updated_beta1_sample = beta1_sample_initial[accept_or_reject_samples]
updated_beta2_sample = beta2_sample_initial[accept_or_reject_samples]


In [None]:
update_fig_noise, ax = plt.subplots(1)
# plot actual pdf of slopes
target_pdf_plot = patches.Rectangle((lower_a1-0.15,lower_a2+0.6),0.6,0.25,
                                   label="Target Sample",
                                   edgecolor='xkcd:yellow',facecolor='C2',alpha=1,
                                   zorder=2,fill=False,linewidth=3.5)
ax.add_patch(target_pdf_plot)
plt.scatter(beta1_sample_initial,beta2_sample_initial,marker='o',zorder=0,
            color='xkcd:sky',alpha=1, label='Initial Sample')
plt.scatter(updated_beta1_sample,updated_beta2_sample,
            marker='x',s=140,
            color='xkcd:red orange',edgecolor='k',zorder=1,alpha=1,
            label='Update Sample')
plt.legend()
plt.title('Data Consistent Update')
plt.xlabel('$\lambda_1$')
plt.ylabel('$\lambda_2$')

In [None]:
# update_fig_noise.savefig('wobbly-update-noise.png',bbox_inches='tight')

# Location Uncertainty Data

Here we solve the same problem using the location noise model.

In [None]:
# simulated QoI data from data variable samples
loc_noise_std = 0.05
y_data_loc1,loc1 = Y(beta_data,locations[0],location_noise=True,loc_noise_pdf=norm(0,loc_noise_std))
y_data_loc2,loc2 = Y(beta_data,locations[1],location_noise=True,loc_noise_pdf=norm(0,loc_noise_std))
y_data = np.array([y_data_loc1,y_data_loc2])

# calculate a gaussian kde for data
data_kde_pdf = gaussian_kde(y_data)

**Plots of the Data**

In [None]:
# location distribution
# plots x_data
loc_fig, ax1 = plt.subplots()
ax1.scatter(loc1[:,0],loc1[:,1],label='actual locations $\\bf{x_A}$',
            color=col_blue,marker='x',s=150)
ax1.scatter(loc2[:,0],loc2[:,1], label='actual locations $\\bf{x_B}$',
            color=col_red,marker='+',s=150)
#ax1.add_patch(patches.Rectangle((-1,-1),2.3,2.3,linewidth=1,edgecolor='gray',facecolor='none',ls='--'))
ax1.set_xlim([-.4,1.4])
ax1.set_ylim([-.4,1.4])
ax1.set_title("Unobserved Measurement Location Variation")
ax1.plot(locations[:,0],locations[:,1],'*',color=col_highlight,label='specified locations',markersize=14,markeredgecolor='k')
ax1.legend()

In [None]:
# loc_fig.savefig('wobbly-plate-loc-noise.png',bbox_inches='tight')

In [None]:
height_fig_locnoise = plt.figure()
x = np.linspace(4,6,100)
plt.title('GKDE Distribution of Sampled Height Data')
plt.plot(x,gaussian_kde(y_data_loc1)(x),label='Obs. at Loc A',
         ls='--',linewidth=2.25,color='xkcd:sky')
plt.plot(x,gaussian_kde(y_data_loc2)(x),label='Obs. at Loc B',
         ls=':',linewidth=3, color='xkcd:red orange')
# plt.hist(y_data_loc1,edgecolor='k',label="Obs. at Loc A",alpha=0.8)
# plt.hist(y_data_loc2,edgecolor='k',label="Obs. at Loc B",alpha=0.5)
plt.xlabel('Height, $y$')
plt.ylabel('Relative Frequency')
plt.legend()

In [None]:
# height_fig_locnoise.savefig('wobbly-plate-loc-data.png',bbox_inches='tight')

#### Naive Direct Inversion using $Q^{-1}$

In [None]:
lam_inverted = np.dot(np.linalg.inv(locations),y_data-3)
plt.scatter(lam_inverted[0],lam_inverted[1])
plt.xlim(0,2)
plt.ylim(0,2)

#### Initial Samples for Data Consistent Inversion

In [None]:
# generate initial samples
beta1_initial = uniform(lower_a1-1,2)
beta2_initial = uniform(lower_a2-1,2)

# number of samples for approximating push-forward
n_samples = 4000

# generate a sample from our INITIAL pdfs
beta1_sample_initial = beta1_initial.rvs(n_samples)
beta2_sample_initial = beta2_initial.rvs(n_samples)


#### Computed with Additive Noise Modeled

In [None]:
# defines initial distribution class objects using scipy package (imported at the top)
# use "rvs" method to generate samples from these objects
# use "pdf" method to evaluate the pdf at a given value

# combine into a matrix of observations
beta_initial = np.stack((beta1_sample_initial,beta2_sample_initial),axis=1)

# calculate data values
y_push_forward = np.array([ Y(beta_initial,locations[0],additive_noise=True)[0], 
                            Y(beta_initial,locations[1],additive_noise=True)[0]])

# calculate a gaussian kde for the push-forward of the sampled inital values
push_forward_kde_pdf = gaussian_kde(y_push_forward)


# calculate maximum of the ratio
M = np.max(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward))

# generate random numbers from uniform for accept-reject for each sample value
test_value = np.random.uniform(0,1,n_samples)

# calculate the ratio for accept reject: data_kde/push_kde/M and compare to test sample
# is the kde ratio > test value?
accept_or_reject_samples = np.greater(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward)/M,
                            test_value)

# accepted values of posterior sample
updated_beta1_sample = beta1_sample_initial[accept_or_reject_samples]
updated_beta2_sample = beta2_sample_initial[accept_or_reject_samples]


In [None]:
update_fig_ADD, ax = plt.subplots(1)
# target distribution
target_pdf_plot = patches.Rectangle((lower_a1-0.15,lower_a2+0.6),0.6,0.25,
                                   label="Target Sample",
                                   edgecolor='xkcd:yellow',facecolor='C2',alpha=1,
                                   zorder=2,fill=False,linewidth=3.5)
ax.add_patch(target_pdf_plot)

plt.scatter(beta1_sample_initial, beta2_sample_initial,
            label='Initial Sample',marker='o',zorder=0,
            color='xkcd:sky',alpha=1)

plt.scatter(updated_beta1_sample,updated_beta2_sample,
            marker='x',s=140,
            color='xkcd:red orange',edgecolor='k',zorder=1,alpha=1,
            label='Update Sample')

plt.xlabel('$\lambda_1$')
plt.ylabel('$\lambda_2$')
plt.title('Data Consistent Update')
plt.legend()

In [None]:
# update_fig_ADD.savefig('wobbly-update-loc-noise-wrong-model.png',bbox_inches='tight')

#### Computed with Location Noise Modeled

In [None]:
# defines initial distribution class objects using scipy package (imported at the top)
# use "rvs" method to generate samples from these objects
# use "pdf" method to evaluate the pdf at a given value
# combine into a matrix of observations
beta_initial = np.stack((beta1_sample_initial,beta2_sample_initial),axis=1)

# calculate data values
y_push_forward_LOC = np.array([ Y(beta_initial,locations[0],location_noise=True)[0], 
                            Y(beta_initial,locations[1],location_noise=True)[0]])

# calculate a gaussian kde for the push-forward of the sampled inital values
push_forward_kde_pdf_LOC = gaussian_kde(y_push_forward_LOC)


# calculate maximum of the ratio
M = np.max(data_kde_pdf(y_push_forward_LOC)/push_forward_kde_pdf_LOC(y_push_forward_LOC))

# generate random numbers from uniform for accept-reject for each sample value
test_value = np.random.uniform(0,1,n_samples)

# calculate the ratio for accept reject: data_kde/push_kde/M and compare to test sample
# is the kde ratio > test value?
accept_or_reject_samples_LOC = np.greater(data_kde_pdf(y_push_forward_LOC)/push_forward_kde_pdf_LOC(y_push_forward_LOC)/M,
                            test_value)

# accepted values of posterior sample
updated_beta1_sample_LOC = beta1_sample_initial[accept_or_reject_samples_LOC]
updated_beta2_sample_LOC = beta2_sample_initial[accept_or_reject_samples_LOC]


In [None]:
update_fig_LOC, ax = plt.subplots(1)

# target distribution
target_pdf_plot = patches.Rectangle((lower_a1-0.15,lower_a2+0.6),0.6,0.25,
                                   label="Target Sample",
                                   edgecolor='xkcd:yellow',facecolor='C2',alpha=1,
                                   zorder=2,fill=False,linewidth=3.5)
ax.add_patch(target_pdf_plot)

plt.scatter(beta1_sample_initial, beta2_sample_initial,marker='o',zorder=0,
            color='xkcd:sky',alpha=1, label='Update Sample')
plt.scatter(updated_beta1_sample_LOC,updated_beta2_sample_LOC,
            marker='x',s=140,
            color='xkcd:red orange',edgecolor='k',zorder=1,alpha=1,
            label='Update Sample')

#plt.xlim(-1.2,1.2)
#plt.ylim(-1.2,1.2)

plt.xlabel('$\lambda_1$')
plt.ylabel('$\lambda_2$')
plt.title('Data Consistent Update')
plt.legend()

In [None]:
# update_fig_LOC.savefig('wobbly-update-loc-noise-correct-model.png',bbox_inches='tight')

In [None]:
# check predictability assumption
print("For Additive Model: ", np.mean(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward)))
print()
print("For Loc Model: ", np.mean(data_kde_pdf(y_push_forward_LOC)/push_forward_kde_pdf_LOC(y_push_forward_LOC)))

# Location Uncertainty Model with Domain Shift

Here we shift the initial domain to be $[0,2]$. This will illustrate the differences between location and additive noise models better because the slopes $\lambda$ of the wobbly-plate will scale the location noise more extremely than when we're within the domain $\Lambda:=[0,1]$.

In [None]:
# location params for true parameter distribution: center at
lower_a1 = 1
lower_a2 = 1

# beta values at each observation point
beta1_data = uniform.rvs(lower_a1-0.15,0.6,n_obs)
beta2_data = uniform.rvs(lower_a2+0.6,0.25,n_obs)

beta_data = np.stack((beta1_data,beta2_data),axis=1)

In [None]:
# simulated QoI data from data variable samples
loc_noise_std = 0.075
y_data_loc1,loc1 = Y(beta_data,locations[0],location_noise=True,loc_noise_pdf=norm(0,loc_noise_std))
y_data_loc2,loc2 = Y(beta_data,locations[1],location_noise=True,loc_noise_pdf=norm(0,loc_noise_std))
y_data = np.array([y_data_loc1,y_data_loc2])

# calculate a gaussian kde for data
data_kde_pdf = gaussian_kde(y_data)

In [None]:
slope_fig_shift, ax = plt.subplots(1)
# set up prior range of slopes
bounding_range = np.linspace(lower_a1-1,lower_a1+1,100)
plt.plot(bounding_range,np.max(bounding_range)*np.ones(100),color=col_blue,ls='--',linewidth=2,label='Initial Description of Slopes') # top
plt.plot(np.max(bounding_range)*np.ones(100),bounding_range,color=col_blue,ls='--',linewidth=2) # right
plt.plot(bounding_range,np.min(bounding_range)*np.ones(100),color=col_blue,ls='--',linewidth=2) # bottom
plt.plot(np.min(bounding_range)*np.ones(100),bounding_range,color=col_blue,ls='--',linewidth=2) # left
plt.title('Parameter Space: Slope of Plate')
plt.xlabel('$\lambda_1$')
plt.ylabel('$\lambda_2$')

# scatter plot of slopes
#plt.scatter(beta1_data,beta2_data,label="Target Sampled Distribution")
#plt.scatter(beta_Q_inv[0],beta_Q_inv[1])

# plot actual pdf of slopes
slope_pdf_plot = patches.Rectangle((lower_a1-0.15,lower_a2+0.6),0.6,0.25,
                                   label="Target Distribution",
                                   edgecolor='k',facecolor=col_highlight,alpha=1)
ax.add_patch(slope_pdf_plot)

plt.xlim(lower_a1-1.2,lower_a1+1.2)
plt.ylim(lower_a2-1.2,lower_a2+1.2)
plt.legend()

**Plots of the Data**

In [None]:
# location distribution
# plots x_data
loc_fig_shift, ax1 = plt.subplots()
ax1.scatter(loc1[:,0],loc1[:,1],label='actual locations $\\bf{x_A}$',
            color=col_blue,marker='x',s=150)
ax1.scatter(loc2[:,0],loc2[:,1], label='actual locations $\\bf{x_B}$',
            color=col_red,marker='+',s=150)
#ax1.add_patch(patches.Rectangle((-1,-1),2.3,2.3,linewidth=1,edgecolor='gray',facecolor='none',ls='--'))
ax1.set_xlim([-.4,1.4])
ax1.set_ylim([-.4,1.4])
ax1.set_title("Unobserved Measurement Location Variation")
ax1.plot(locations[:,0],locations[:,1],'*',color=col_highlight,label='specified locations',markersize=14,markeredgecolor='k')
ax1.legend()

In [None]:
height_fig_locnoise_shift = plt.figure()
x = np.linspace(3,6,100)
plt.title('GKDE Distribution of Sampled Height Data')
plt.plot(x,gaussian_kde(y_data_loc1)(x),label='Obs. at Loc A',
         ls='--',linewidth=2.25,color='xkcd:sky')
plt.plot(x,gaussian_kde(y_data_loc2)(x),label='Obs. at Loc B',
         ls=':',linewidth=3, color='xkcd:red orange')
# plt.hist(y_data_loc1,edgecolor='k',label="Obs. at Loc A",alpha=0.8)
# plt.hist(y_data_loc2,edgecolor='k',label="Obs. at Loc B",alpha=0.5)
plt.xlabel('Height, $y$')
plt.ylabel('Relative Frequency')
plt.legend()

#### Initial Samples for Data Consistent Inversion

In [None]:
# generate initial samples
beta1_initial = uniform(lower_a1-1,2)
beta2_initial = uniform(lower_a2-1,2)

# number of samples for approximating push-forward
n_samples = 3500

# generate a sample from our INITIAL pdfs
beta1_sample_initial = beta1_initial.rvs(n_samples)
beta2_sample_initial = beta2_initial.rvs(n_samples)


#### Computed with Additive Noise Modeled

This shows what happens when we use the wrong model to compute the update.

Here, the additive noise model doesn't satisfy the predictability assumption (due to lack of scaling of the location noise).

In [None]:
# defines initial distribution class objects using scipy package (imported at the top)
# use "rvs" method to generate samples from these objects
# use "pdf" method to evaluate the pdf at a given value

# combine into a matrix of observations
beta_initial = np.stack((beta1_sample_initial,beta2_sample_initial),axis=1)

# calculate data values
add_sig = loc_noise_std*1.1
y_push_forward = np.array([ Y(beta_initial,locations[0],additive_noise=True,add_noise_pdf=norm(0,add_sig))[0], 
                            Y(beta_initial,locations[1],additive_noise=True,add_noise_pdf=norm(0,add_sig))[0]])

# calculate a gaussian kde for the push-forward of the sampled inital values
push_forward_kde_pdf = gaussian_kde(y_push_forward)


# calculate maximum of the ratio
M = np.max(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward))

# generate random numbers from uniform for accept-reject for each sample value
test_value = np.random.uniform(0,1,n_samples)

# calculate the ratio for accept reject: data_kde/push_kde/M and compare to test sample
# is the kde ratio > test value?
accept_or_reject_samples = np.greater(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward)/M,
                            test_value)

# accepted values of posterior sample
updated_beta1_sample = beta1_sample_initial[accept_or_reject_samples]
updated_beta2_sample = beta2_sample_initial[accept_or_reject_samples]


In [None]:
update_fig_ADD_shift, ax = plt.subplots(1)
# target distribution
target_pdf_plot = patches.Rectangle((lower_a1-0.15,lower_a2+0.6),0.6,0.25,
                                   label="Target Sample",
                                   edgecolor='xkcd:yellow',facecolor='C2',alpha=1,
                                   zorder=2,fill=False,linewidth=3.5)
ax.add_patch(target_pdf_plot)

plt.scatter(beta1_sample_initial, beta2_sample_initial,
            label='Initial Sample',marker='o',zorder=0,
            color='xkcd:sky',alpha=1)

plt.scatter(updated_beta1_sample,updated_beta2_sample,
            marker='x',s=140,
            color='xkcd:red orange',edgecolor='k',zorder=1,alpha=1,
            label='Update Sample')

plt.xlabel('$\lambda_1$')
plt.ylabel('$\lambda_2$')
plt.title('Data Consistent Update')
plt.legend(loc=3)

In [None]:
# update_fig_ADD_shift.savefig('wobbly-update-loc-noise-wrong-model2.png',bbox_inches='tight')

#### Computed with Location Noise Modeled

In [None]:
# defines initial distribution class objects using scipy package (imported at the top)
# use "rvs" method to generate samples from these objects
# use "pdf" method to evaluate the pdf at a given value
# combine into a matrix of observations
beta_initial = np.stack((beta1_sample_initial,beta2_sample_initial),axis=1)

# calculate data values
loc_sig = loc_noise_std*1.1
y_push_forward_LOC = np.array([ Y(beta_initial,locations[0],location_noise=True,loc_noise_pdf=norm(0,loc_sig))[0], 
                            Y(beta_initial,locations[1],location_noise=True,loc_noise_pdf=norm(0,loc_sig))[0]])

# calculate a gaussian kde for the push-forward of the sampled inital values
push_forward_kde_pdf_LOC = gaussian_kde(y_push_forward_LOC)


# calculate maximum of the ratio
M = np.max(data_kde_pdf(y_push_forward_LOC)/push_forward_kde_pdf_LOC(y_push_forward_LOC))

# generate random numbers from uniform for accept-reject for each sample value
test_value = np.random.uniform(0,1,n_samples)

# calculate the ratio for accept reject: data_kde/push_kde/M and compare to test sample
# is the kde ratio > test value?
accept_or_reject_samples_LOC = np.greater(data_kde_pdf(y_push_forward_LOC)/push_forward_kde_pdf_LOC(y_push_forward_LOC)/M,
                            test_value)

# accepted values of posterior sample
updated_beta1_sample_LOC = beta1_sample_initial[accept_or_reject_samples_LOC]
updated_beta2_sample_LOC = beta2_sample_initial[accept_or_reject_samples_LOC]


In [None]:
update_fig_LOC_shift, ax = plt.subplots(1)

# target distribution
target_pdf_plot = patches.Rectangle((lower_a1-0.15,lower_a2+0.6),0.6,0.25,
                                   label="Target Sample",
                                   edgecolor='xkcd:yellow',facecolor='C2',alpha=1,
                                   zorder=2,fill=False,linewidth=3.5)
ax.add_patch(target_pdf_plot)

plt.scatter(beta1_sample_initial, beta2_sample_initial,marker='o',zorder=0,
            color='xkcd:sky',alpha=1, label='Initial Sample')
plt.scatter(updated_beta1_sample_LOC,updated_beta2_sample_LOC,
            marker='x',s=140,
            color='xkcd:red orange',edgecolor='k',zorder=1,alpha=1,
            label='Update Sample')

#plt.xlim(-1.2,1.2)
#plt.ylim(-1.2,1.2)

plt.xlabel('$\lambda_1$')
plt.ylabel('$\lambda_2$')
plt.title('Data Consistent Update')
plt.legend()

In [None]:
# update_fig_LOC_shift.savefig('wobbly-update-loc-noise-correct-model2.png',bbox_inches='tight')

In [None]:
# check predictability assumption
mean_r_ADD = np.mean(data_kde_pdf(y_push_forward)/push_forward_kde_pdf(y_push_forward))
mean_r_LOC = np.mean(data_kde_pdf(y_push_forward_LOC)/push_forward_kde_pdf_LOC(y_push_forward_LOC))
print("For Additive Model: ", mean_r_ADD)
print()
print("For Loc Model: ", mean_r_LOC)

## Plot Predicted vs. Observed Distribution

In [None]:
import matplotlib.colors as colors

In [None]:
# make data-space grid
upper_lim = max(y_push_forward_LOC.max(),y_push_forward.max())
lower_lim = min(y_push_forward_LOC.min(),y_push_forward.min())
qx = np.linspace(lower_lim,upper_lim,150)
qy = np.linspace(lower_lim,upper_lim,150)
qX, qY = np.meshgrid(qx, qy)
eval_qXY = np.vstack([qX.ravel(),qY.ravel()])

# eval points and reshape
Z1 = push_forward_kde_pdf(eval_qXY).reshape(qX.shape)
Z2 = push_forward_kde_pdf_LOC(eval_qXY).reshape(qX.shape)
Z3 = data_kde_pdf(eval_qXY).reshape(qX.shape)

In [None]:
fig_contours_data, ax = plt.subplots(1)

level_sets = [0.001,0.01,0.1,0.5,0.75]

# ax.scatter(y_predict[i][0],y_predict[i][1],color='xkcd:sky',
#            marker='.',s=70,alpha=0.5,label='Predict sample')
contours = ax.contour(qX,qY,Z3,levels=level_sets,
                      norm=colors.PowerNorm(gamma=0.3),
                      cmap='winter',alpha=0.7)
# ax.clabel(contours,contours.levels,inline=True)
ax.scatter(y_data[0],y_data[1],color='xkcd:red orange',
           marker='x',s=70,label='Obs. data')

ax.set_title('GKDE of Sampled Height Data')
ax.set_xlabel('$y_A$')
ax.set_ylabel('$y_B$')
# ax.annotate('$\\widebar{{r}}={:0.3}$'.format(mean_r[i]), 
#             xy=(0.1,0.8), xycoords='axes fraction',
#             va='top')
ax.legend(loc='lower right')


In [None]:
fig_contours_predict, ax_ADD = plt.subplots(1)
fig_contours_predict_LOC, ax_LOC = plt.subplots(1)

level_sets = [0.001,0.01,0.1,0.5,0.75]

y_predict = [y_push_forward,y_push_forward_LOC]
Zs = [Z1,Z2]
title_str = ['Unsatisfactory', 'Satisfactory'] #['Add. Noise','Loc. Noise']
mean_r = [mean_r_ADD,mean_r_LOC]

for i,ax in enumerate([ax_ADD,ax_LOC]):
    ax.scatter(y_predict[i][0],y_predict[i][1],color='xkcd:sky',
               marker='.',s=70,alpha=0.5,label='Predict sample')
    contours = ax.contour(qX,qY,Zs[i],levels=level_sets,
                          norm=colors.PowerNorm(gamma=0.3),
                          cmap='winter',alpha=0.7)
    ax.clabel(contours,contours.levels,inline=True)
    ax.scatter(y_data[0],y_data[1],color='xkcd:red orange',
               marker='x',s=70,label='Obs. data')

    ax.set_title('{} Predictive Model'.format(title_str[i]))
    ax.set_xlabel('$y_A$')
    ax.set_ylabel('$y_B$')
    ax.annotate('$\\widebar{{r}}={:0.3}$'.format(mean_r[i]), 
                xy=(0.1,0.8), xycoords='axes fraction',
                va='top')
    ax.legend(loc='lower right')
    

In [None]:
# Save Dissertation Figs
diss_labels = ['fig_dissertation_loc-noise-datagen.png',
               'fig_dissertation_add-noise-unsatisfactory.png',
               'fig_dissertation_loc-noise-satisfactory.png']

fig_list = [fig_contours_data,
            fig_contours_predict,
            fig_contours_predict_LOC]


for this_fig, fig_name in zip(fig_list,diss_labels):
    print(this_fig,fig_name)
#     this_fig.savefig(fig_name,bbox_inches='tight',dpi=250)

## Old Reflections and Plots

In [None]:
# beta_initial.shape

**Consistency Plot Eye-ball Checks**
> Note: the following cell is not up to date

In [None]:
# beta_update = np.stack((updated_beta1_sample,updated_beta2_sample),axis=1)

# plt.figure()
# #plt.hist(y_data_loc1,edgecolor='k',label="Data at Loc A",alpha=0.8,density=True)
# plt.hist(y_data_loc2,edgecolor='k',label="Data at Loc B",alpha=0.5,density=True)
# #plt.hist(y_push_forward[0],edgecolor='k',density=True,alpha=0.8,label='PF at Loc A')
# plt.hist(y_push_forward[1],edgecolor='k',density=True,alpha=0.5,label='PF at Loc B')
# #plt.hist(Y(beta_update,locations[0],location_noise=True)[0],
# #         edgecolor='k',label="UP-PF at Loc A",alpha=0.8,density=True)
# plt.hist(Y(beta_update,locations[1],location_noise=True)[0],
#          edgecolor='k',label="UP-PF at Loc B",alpha=0.5,density=True)
# plt.title("Data Distribution")
# plt.legend()

## Notes and Reflections

> **Notes from experimentation**: If you choose a location noise model, then we have a model which looks like:

>\begin{align}
Q(\beta_1,\beta_2)&=y_0+\beta_1x_1+\beta_2x_2+(\beta_1\xi_1+\beta_2\xi_2) \\
&=y_0+\beta_1x_1(\xi_1)+\beta_2x_2(\xi_2)
\end{align}

Observations:
* If there are not enough data points from $\pi_\mathcal{D}^{obs}$, then it is possible that the predictability assumption will be violated without us being able to detect this.
    * Test Case: If you use location errors and $\beta_1,\beta_2>1$, then the nuisance parameters $\xi_1$ and $\xi_2$ can have extreme values which cannot be predicted by an additive gaussian noise model. However, this may be not detectable if only a small sample size of $\xi_1,\xi_2$ are used.

* Choose a model to satisfies the predictability assumption compared to other model inadequacy models which also satisfy.

# SAVE ALL FIGS FOR PAPER

In [None]:
# slope_fig.savefig('wobbly-target.png',bbox_inches='tight')
# data_fig.savefig('wobbly-data.png',bbox_inches='tight')
# update_fig.savefig('wobbly-update.png',bbox_inches='tight')
# update_fig_noise.savefig('wobbly-update-noise.png',bbox_inches='tight')
# loc_fig.savefig('wobbly-plate-loc-noise.png',bbox_inches='tight')
# height_fig_locnoise.savefig('wobbly-plate-loc-data.png',bbox_inches='tight')
# update_fig_ADD.savefig('wobbly-update-loc-noise-wrong-model.png',bbox_inches='tight')
# update_fig_LOC.savefig('wobbly-update-loc-noise-correct-model.png',bbox_inches='tight')
# update_fig_ADD_shift.savefig('wobbly-update-loc-noise-wrong-model2.png',bbox_inches='tight')
# update_fig_LOC_shift.savefig('wobbly-update-loc-noise-correct-model2.png',bbox_inches='tight')