### Inverse Problems and Parameter Estimation, GEOS 627/427, University of Alaska Fairbanks

- script ex2p1_ex2p2.ipynb

In [None]:
%matplotlib inline

In [None]:
%run lib_header.py

In [None]:
import scipy.io
import scipy.special
from numpy.linalg import inv

from lib_inverse import plot_ellipse
from lib_peip import chi2cdf
from lib_peip import chi2inv

In [None]:
# load data
dat   = scipy.io.loadmat('data/data1.mat')
data1 = dat['data1']
print(f"data1: {data1.shape}")
ndata = len(data1)
t     = data1[:,0].reshape(ndata,1)
y     = data1[:,1].reshape(ndata,1)
sigma = data1[:,2].reshape(ndata,1)
ones  = np.ones((ndata,1))
ndata = len(t)

print('displaying t, y, sigma:')
showmat(np.hstack((t,y,sigma)),2)

In [None]:
# Build the parabolic system matrix
G = np.hstack(((ones, t, -0.5*t**2)))
showmat(G)

In [None]:
# Apply the weighting
yw = y/sigma
Gw = G / np.hstack((sigma,sigma,sigma))
showmat(Gw,2)

In [None]:
# Solve for the least-squares solution
print('Least-squares solution, m =')
m = inv(Gw.T@Gw) @ Gw.T @ yw
showmat(m,1)
m1 = m[0][0]
m2 = m[1][0]
m3 = m[2][0]

In [None]:
ginv = inv(Gw.T@Gw) @ Gw.T
showmat(ginv,2)

In [None]:
print('Covariance matrix')
covm = ginv @ ginv.T
showmat(covm,2)
C = covm

In [None]:
# see notes_tarantola.pdf
PCONF = 0.95
DELTA = scipy.special.erfinv(PCONF)*np.sqrt(2)
print(DELTA)

In [None]:
# Get the 1.96-sigma (95%) conf intervals
print('%.1f%% parameter confidence intervals (m-delm, mest, m+delm)' % (PCONF*100))
delm = DELTA*np.sqrt(np.diag(covm)).reshape((3,1))
showmat(delm,2)
showmat(np.hstack((m-delm, m, m+delm)),2)

In [None]:
# Because there are 3 parameters to estimate, we have ndata-3 degrees of freedom.
dof = ndata-3
chi2 = np.linalg.norm((y - G@m)/sigma)**2
print('chi-square misfit for %i dof = %.2f' % (dof,chi2))

# Find the p-value for this data set
p = 1-chi2cdf(chi2,dof)
print('chi-square p-value = %.2f' % p)

# Find the parameter correlations
s = np.sqrt(np.diag(covm)).reshape((3, 1))

print('correlation matrix =')
r = covm / (s@s.T)
showmat(r,2)

In [None]:
# Plot the data and model predicted data
xx = np.arange(np.min(t)-1,np.max(t)+1+0.05,0.05)
mm = m1 + m2*xx - 0.5*m3*xx**2

plt.figure(figsize=(9,7))
plt.plot(xx,mm,'k')
#plt.plot(t,y,'o-')
# the flatten() commands are needed, unfortunately:
plt.errorbar(t.flatten(),y.flatten(),yerr=sigma.flatten(), ls='none',marker='o',mfc='none',capsize=3,mec='k',ms=3)
plt.xlabel('Time (s)')
plt.ylabel('Elevation (m)')
plt.show()

In [None]:
print('Displaying Data and Model Fit (fig 1)')

# Output covm and the eigenvalues/eigenvectors of covm.
print('Covariance matrix for fitted parameters.')

lam0,u0 = np.linalg.eig(inv(covm))
# we need to sort them to match Aster
# note: the sign of u1 is flipped
indices = np.argsort(lam0)
lam, u = lam0[indices], u0[:, indices]
print('Eigenvalues of the covariance matrix:')
showmat([lam],4)
print('Eigenvectors of the covariance matrix:')
showmat(u,2)

print('%.1f%% confidence ellipsoid semiaxis lengths:' % (PCONF*100))
semi_axes = np.sqrt(chi2inv(PCONF,3)/lam)
showmat([semi_axes],2)

In [None]:
# Monte Carlo Section
y0 = G@m
nreal = 1000
mmc = np.zeros((3, nreal))
chimc = np.zeros(nreal)
Ginv = np.linalg.pinv(Gw)

for i in range(nreal):
    # Generate a trial data set of perturbed, weighted data
    noise = np.random.randn(ndata, 1) * sigma
    ytrial = y0 + noise
    ywtrial = ytrial / sigma
    # KEY COMMAND: SOLVE FOR m FOR A SET OF DATA WITH ERRORS ADDED
    mmx = Ginv @ ywtrial
    mmc[:, i] = mmx.flatten()
    chimc[i] = np.linalg.norm((G @ mmx - ytrial) / sigma) ** 2

In [None]:
# Plot the histogram of chi squared values
plt.figure()
plt.hist(chimc,30)
plt.ylabel('N')
plt.xlabel(r'$\chi^2$')
print('Displaying 1000 Monte-Carlo Chi-square Values (fig 2)')

In [None]:
# Plot the histograms of the model parameters
plt.figure(figsize=(10,5))

plt.subplot(1,3,1)
plt.hist(mmc[0,:])
plt.title(r'$m_1$ (m)')

plt.subplot(1,3,2)
plt.hist(mmc[1,:])
plt.title(r'$m_2$ (m/s)')

plt.subplot(1,3,3)
plt.hist(mmc[2,:])
plt.title(r'$m_3$ (m/s$^2$)')
print('Displaying Monte-Carlo Model Histograms (fig 3)')

In [None]:
# Plot the realizations of each pair of model parameters with the other
plt.figure(figsize=(10,5))

plt.subplot(1,3,1)
plt.plot(mmc[0,:],mmc[1,:],'k*')
plt.xlabel(r'$m_1$ (m)')
plt.ylabel(r'$m_2$ (m/s)')

plt.subplot(1,3,2)
plt.plot(mmc[0,:],mmc[2,:],'k*')
plt.xlabel(r'$m_1$ (m)')
plt.ylabel(r'$m_3$ (m/s$^2$)')

plt.subplot(1,3,3)
plt.plot(mmc[1,:],mmc[2,:],'k*')
plt.xlabel(r'$m_2$ (m/s)')
plt.ylabel(r'$m_3$ (m/s$^2$)')

plt.subplots_adjust(wspace=0.5)
plt.tight_layout()

print('Displaying Projections of 1000 Monte-Carlo models (fig 4)')

In [None]:
# Plot the 95% error ellipses for each pair of parameters
# Note that because we're doing pairs of parameters there are 2 degrees of freedom in the Chi-square here, rather than 3.  

deltachi2 = chi2inv(PCONF,2)
delta = np.sqrt(deltachi2)
print('PCONF = %.2f, deltachisq = %.2f, delta = %.2f' % (PCONF,deltachi2,delta))

In [None]:
# THESE NUMBERS DO NOT QUITE MATCH ASTER 2.56
print('%.1f%% parameter confidence intervals (m, delm)' % (PCONF*100))
delm = delta*np.sqrt(np.diag(covm)).reshape((3,1))
showmat(np.hstack((m, delm)),2)

In [None]:
# plot_ellipse() can be found in lib_inverse.py
def plot_ellipse_custom(inds,DELTA2,C,m):
    indc = np.ix_(inds,inds)
    C = covm[indc]
    [x,y] = plot_ellipse(DELTA2,C,np.array([m[inds[0]],m[inds[1]]]))
    plt.plot(x,y,'r')

In [None]:
plt.figure(figsize=(10,5))

plt.subplot(1,3,1)
plot_ellipse_custom([0,1],deltachi2,C,m)
plt.xlabel(r'$m_1$ (m)')
plt.ylabel(r'$m_2$ (m/s)')

plt.subplot(1,3,2)
plot_ellipse_custom([0,2],deltachi2,C,m)
plt.xlabel(r'$m_1$ (m)')
plt.ylabel(r'$m_3$ (m/s$^2$)')

plt.subplot(1,3,3)
plot_ellipse_custom([1,2],deltachi2,C,m)
plt.xlabel(r'$m_2$ (m/s)')
plt.ylabel(r'$m_3$ (m/s$^2$)')

plt.subplots_adjust(wspace=0.5)
plt.tight_layout()
plt.show()