In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import math
from numpy.linalg import inv
import numpy.matlib
from scipy.stats.distributions import chi2
import warnings
warnings.filterwarnings('ignore')
import scipy.linalg as la

from lib_peip import *
from lib_geos import *

In [None]:
# genlsq = Generalized Least Squares
# Template script for the iterative quasi-Newton method for a 4-parameter
# inversion for epicenter (xs, ys), origin time (ts), and velocity (V).
# The algorithm employs generalized least squares, where by both data
# covariances and model covariances are used.
#
# Background reading: Tarantola book (2005), Ch. 3 and Appendix 6.22
#
# calls forward_epicenter.ipynb, plot_histo()
#
# contributors: Carl Tape, Nealey Sims*, Amanda McPherson*

In [None]:
# USER INPUT
nsamples = 1000
irandom_initial_model = 0      # 0(fixed), 1(random)
irandom_target_model = 0       # 0(fixed), 1(random)
idata_errors = 2               # 0(none),  1(random), 2(fixed)
ifig = 1                       # 0,1

inormalization = 1
stnsamples = str(nsamples) + ' samples'
stlabS = ('Sd(m^n)','Sm(m^n)','S(m^n) = Sd + Sm')

# which forward problem to test
iforward = 1  # 1(default), 2(Tarantola 7-1)

In [None]:
# FORWARD PROBLEM: EXPLORE THIS SEPARATE NOTEBOOK
if iforward==1:
    %run ./forward_epicenter.ipynb
else:
    %run ./forward_epicenter_crescent.ipynb

In [None]:
if ifig==1:
    # plot different histograms of properties of the prior model covariance samples
    fig = plt.figure(figsize=(9,9))
    for kk in range(nparm):
        sigma = sigma_prior[kk]
        edges = np.arange(-4*sigma,4*sigma,sigma/2)
        etemp = cov_samples_m[kk,:]
        plt.subplot(2,2,kk+1)
        plt.grid()
        plot_histo(etemp,edges)
        #plt.bar(etemp)
        plt.ylim([0, 0.4])
        plt.title('mprior samples: Model parameter '+str(kk+1)+' ('+str(mlabs[kk])+')\n'+'mean = %.5f; std = %.5f' % (np.mean(etemp),np.std(etemp)))
        plt.tight_layout()
    # plot different histograms of properties of the data covariance samples
    fig2 = plt.figure(figsize=(9,11))
    for ii in range(ndata):
        sigma = sigma_obs[ii]
        edges = np.arange(-4*sigma,4*sigma,sigma/2)
        etemp = cov_samples_d[ii,:]
        plt.subplot(4,3,ii+1)
        plot_histo(etemp,edges)
        plt.ylim([0, 0.4])
        plt.title('Data index ' + str(ii+1) +'\n'+'mean = %.5f; std = %.5f' % (np.mean(etemp),np.std(etemp))) 
        plt.tight_layout()

In [None]:
# predictions for prior and initial models (not necessary)
dprior   = d(mprior)
dinitial = d(minitial)

if ifig==1:
    fig3 = plt.figure(figsize=(10,10))
    ivecd = np.arange(1,ndata+1)
    plt.plot(ivecd,dobs_samples,'.-')
    #p1 = plt.plot(ivecd,dprior,'bo-',linewidth=2,markersize=10,markerfacecolor='b',markeredgecolor='w')
    p2 = plt.plot(ivecd,dinitial,'ko-',linewidth=2,markersize=10,markerfacecolor='k',markeredgecolor='w')
    p3 = plt.plot(ivecd,dtarget,'ro--',linewidth=2,markersize=10,markerfacecolor='r',markeredgecolor='w')
    p4 = plt.plot(ivecd,dobs,'ro-',linewidth=2,markersize=10,markerfacecolor='r',markeredgecolor='w')
    #plt.legend([p1[0],p2[0],p3[0],p4[0]],['g(mprior)','g(minitial)','g(mtarget)','g(mtarget) + errors','location','northwest'])
    plt.legend([p2[0],p3[0],p4[0]],['d(minitial)','d(mtarget)','d(mtarget) + errors','location','northwest'])
    plt.title('BLACK = d(minitial); RED DASHED = d(mtarget); RED = d(mtarget) + errors');
    plt.xlim([0.5, ndata+0.5])  #set(gca,'xtick',[1:ndata]);
    plt.xlabel('Data index')
    plt.ylabel('Prediction value, d = g(m)')

In [None]:
# MISFIT FUNCTION: least squares, Tarantola (2005), Eq. 6.251
# This calls the function d(m) to compute the predictions.

# data misfit
def Sd(m,dobs,icobs):
    sd = 0.5 * (d(m)-dobs).T @ icobs @ (d(m)-dobs)
    return sd
# model misfit (related to regularization)
def Sm(m,mprior,icprior):
    sm = 0.5 * (m-mprior).T @ icprior @ (m-mprior)
    return sm
# total misfit
def S(m,dobs,mprior,icobs,icprior):
    s = Sd(m,dobs,icobs) + Sm(m,mprior,icprior)
    return s

# initial model
#mnew = mprior     # prior model
mnew = minitial

Sd_0 = Sd(mnew,dobs,icobs)
Sm_0 = Sm(mnew,mprior,icprior)
S_0  = S(mnew,dobs,mprior,icobs,icprior)
stS0 = 'S(m0) = %.3f = %.3f(D) + %.3f(M)'% (S_0,Sd_0,Sm_0)
print(stS0)

In [None]:
niter = 8
#niter = input(' Select the number of iterations (< 10) or 0 to exit: ')
#niter = int(niter)
if int(niter) == 0:
    sys.exit('no iterations requested: exiting')

In [None]:
mnew = minitial
# initialize arrays
iter_vec = np.transpose(np.arange(0,niter+1))
Sd_vec = np.zeros((niter+1,1))
Sm_vec = np.zeros((niter+1,1))
S_vec = np.zeros((niter+1,1))

# misfit for initial model
Sd_vec[0] = Sd_0
Sm_vec[0] = Sm_0
S_vec[0]  = S_0

# preconditional F (not Fhat!)
F0 = np.eye(nparm)

imethod = 2    # quasi-Newton method (=2)
stlabels2 = 'none','newton','quasi','steepest','cg','cgpoly','vmmatrix','vmvector','srvmmatrix','srvmvector'
stmethod = stlabels2[imethod]

In [None]:
for nn in range(1,niter+1):
    #///////////////////////////////
    # CODE HERE for quasi-Newton algorithm

    
    
    

    # misfit function for new model
    # note: book-keeping only -- not used within the algorithm above
    #Sd_vec[nn] = 
    #Sm_vec[nn] = 
    #S_vec[nn] = 

    #///////////////////////////////

# misfit function values
print('summary of misfit function:');
print('%8s%16s%16s%16s'% ('iter','Sd','Sm','S = Sm + Sd'))
for nn in range(niter):
    print('%8i%16.10f%16.10f%16.10f' % (iter_vec[nn],Sd_vec[nn],Sm_vec[nn],S_vec[nn]))

In [None]:
# plot convergence curve
ya = math.floor(np.min(np.log10([Sm_vec, Sd_vec, S_vec])))
yb = math.ceil(np.max(np.log10([Sm_vec, Sd_vec, S_vec])))
ylims = [10**ya, 10**yb]
stit = str(niter) +' iterations'
plt.plot(iter_vec, np.log10(Sd_vec),'r.-',iter_vec, np.log10(Sm_vec),'b.-',iter_vec, np.log10(S_vec),'k.-',
    linewidth=2,markersize=20)
plt.legend(stlabS)
plt.xlim([-0.5, niter+0.5])
#plt.ylim([np.log10(ylims[0]),np.log10(ylims[0])])

plt.locator_params(axis="x", integer=True, tight=True)
plt.xlabel('n, iteration')
plt.ylabel(' log10[ S(m^n) ], misfit function')
plt.title(stit)
plt.show()

In [None]:
# exit early
#raise SystemExit("stop here")

In [None]:
# calculate the correlation matrix from the covariance matrix
def corrcov(C):
    nx,ny = C.shape
    if nx != ny:
        return
    sigma = np.sqrt(np.diag(C))
    outer_v = np.outer(sigma,sigma)
    Crho = C / outer_v
    Crho[C == 0] = 0
    return Crho

In [None]:
#///////////////////////////////
# COMPUTE THE FOLLOWING
# mpost       posterior model ("final" model)
# dpost       predictions for mpost
# Gpost       partial derivatives matrix at mpost
# cpost0      posterior covariance matrix (use icobs0 and icprior0)
# sigma_post  variances of the posterior covariance matrix
# rho_post    posterior correlation matrix (hint: see Tarantola, Section 3.3)
# CODE HERE



#///////////////////////////////

In [None]:
# a priori model correlations (for comparison)
rho_prior = corrcov(cprior0)

print('mpost \n',mpost, '\n', 'cpost0 \n', cpost0, '\n','rho_post \n', rho_post)
print('variances:')
print(np.diag(cpost0))
print('sigma_post \n',sigma_post)

# posterior data covariance matrix (e.g., Tarantola Eq. 3.44)

cpost0_d     = Gpost @ cpost0 @ Gpost.T
cpost0_d     = (cpost0_d + cpost0_d.T)/2   # force to be symmetric
sigma_post_d = np.sqrt(np.diag(cpost0_d))  # IGNORING OFF-DIAGONAL ELEMENTS
rho_post_d   = corrcov(cpost0_d)           # posterior correlation matrix
rho_prior_d  = corrcov(cobs0)              # prior, for comparison

# probably cannot get a cholesky decomposition for Cpost_d
# Lpost_d = chol(cpost0_d,'lower');
# dpost_samples = zeros(ndata,nsamples);
# dcov_samples = zeros(ndata,nsamples);
# for xx=1:nsamples, randn_vecs_d(:,xx) = randn(ndata,1); end
# dcov_samples  = Lpost_d * randn_vecs_d;
# dpost_samples = repmat(dpost,1,nsamples) + dcov_samples;
# std_samples_d = std(dpost_samples');

#format long
print('model summary (%i iterations):'% (niter))
print('%16s%16s%16s%16s' % ('prior', 'initial','posterior', 'target'))
for ii in range(nparm):
        print('%16s%16s%16s%16s'%(str(mprior[ii]),str(minitial[ii]),str(mpost[ii]), str(mtarget[ii])))
print('data summary (%i observations):' % (ndata))
print('%16s%16s%16s%16s%16s'% ('prior', 'initial','posterior', 'target', 'actual'))
for ii in range(ndata):
        print('%16s%16s%16s%16s%16s'%(str(dprior[ii]),str(dinitial[ii]),str(dpost[ii]), 
                                      str(dtarget[ii]), str(dobs[ii])))

In [None]:
# Cholesky decomposition to obtain the square-root of cpost0
# NOTE: for large problems, this is not possible due to poor
#       conditioning of cpost0 or the inability to compute cpost0
Lpost = np.linalg.cholesky(cpost0)

# samples of the posterior distribution
mpost_samples = np.zeros((nparm,nsamples))
mcov_samples  = np.zeros((nparm,nsamples))
for xx in range(nsamples): 
    randn_vecs_m[:,xx] = np.random.randn(nparm)
mcov_samples  = Lpost @ randn_vecs_m
mpost_samples = np.matlib.repmat(mpost,1,nsamples) + mcov_samples

# compare the standard deviation with sigma_post
std_samples = np.std(mpost_samples.T, axis=0)

# compare posterior model distribution with prior
print('  ')
print(' Compare model uncertainties : ')
#print('              model parameter: ' + ''.join([f"{_:>13}" for _ in mlabs]))  
print('             model parameter :',''.join(['%13s' % m for m in mlabs]))
print('                       units :',''.join(['%13s' % u for u in ulabs]))
print('                 sigma_prior =',''.join(['%13.5s' % u for u in sigma_prior]))
print('                  sigma_post =',''.join(['%13.5s' % u for u in sigma_post]))
print('   std(%6.0f mpost_samples) =' % nsamples,''.join(['%13.5s' % u for u in std_samples]))
print('      sigma_post/sigma_prior =',''.join(['%13.5s' % u for u in np.divide(sigma_post,sigma_prior)]))
print('1 - (sigma_post/sigma_prior) =',''.join(['%13.5s' % u for u in 1 - np.divide(sigma_post,sigma_prior)]))           

# compute the predictions associated with the posterior samples,
# then compare std_d_samples with sigma_post_d
d_samples = np.zeros((ndata,nsamples))
for xx in range(nsamples):
    ms = mpost_samples[:,xx]
    d_samples[:,xx] = d(ms)

covd_samples  = np.cov(d_samples)
rhod_samples  = corrcov(covd_samples)
std_d_samples = np.sqrt(np.diag(covd_samples))
#std_d_samples = std(d_samples')

print('  ')
print('Compare data uncertainties : ')
print('%16s %10s %10s %10s'%('prior','post','samples','post/prior'))
call = [sigma_obs, sigma_post_d, std_d_samples, np.divide(sigma_post_d,sigma_obs)]
for ii in range(ndata):
    print('%6i%10.4f %10.4f %10.4f %10.4f'%(ii+1,call[0][ii],call[1][ii],call[2][ii],call[3][ii]))
    

In [None]:
def plot_covsamples(msamples,rho,tlab,sym1,msamples2,rho2,tlab2,sym2,mlabs):
    # Python adaptation of plot_covsamples
    #PLOT_COVMSAMPLES generates plots for samples of covariance matrix
    #
    # INPUT
    #    msamples   n x p matrix of vector samples
    #    rho        n x n 'analytical' correlation matrix
    #    tlab       label for plot
    #    sym1       plotting symbol
    #    msamples2  optional: 2nd set of samples ([] for none)
    #    rho2       optional: 2nd 'analytical' correlation matrix ([] for none)
    #    tlab2      optional: label for plot
    #    sym2       optional: plotting symbol
    #    mlabs      optional: labels for each variable ([] for default)
    #
    # EXAMPLE: 
    #    plot_covsamples(mpost_samples,rho_post,'mpost','b.',[],[],[],[],mlabs);
    #
    # NOTE: We could alternatively estimate the covariance matrix
    # (and correlation matrix) directly from the input samples.
    #
    # Carl Tape, 2012-01-01
    #
    NMAX = 6   # max number to make into scatterplot
    
    n = np.shape(msamples)[0]
    p = np.shape(msamples)[1]
    print('plot_covsamples: n = %i, p = %i'%(n,p))
    
    if len(mlabs)==0:
        #mlabs = strtrim(cellstr(num2str([1:n]')))
        mlabs = []    #np.matlib.repmat(str(''),n,1)
        for ii in range(n):
            mlabs.append( 'i'+str(ii+1))        
    # whether to plot a second set of samples
    if len(msamples2)!=0 and len(rho2)!=0:
        iplot_second_set = 1
    else:
        iplot_second_set = 0
    
    # kk=0: correlation matrices from Cpost
    # kk=1: correlation matrices based on input SAMPLES
    fig = plt.figure(figsize=(8,8))
    nr = 1+iplot_second_set
    nc = 2
    for kk in [0,1]:
        if kk==0:
            F1 = rho
            if iplot_second_set==1:
                F2 = rho2
            stag = ''
        else:
            F1 = np.corrcoef(msamples)
            if iplot_second_set==2:
                F2 = np.corrcoef(msamples2)
            stag = 'sample'
        
        # first matrix plot
        pind = kk+1+iplot_second_set*(kk)
        ax = fig.add_subplot(nr,nc,pind)
        p1=ax.imshow(F1,cmap='jet',vmin=-1,vmax=1)
        ax.xaxis.tick_top()
        if iplot_second_set==0:
            fig.colorbar(p1,shrink=0.3,aspect=10,ticks=[-1,-0.5,0,0.5,1])
        else:
            fig.colorbar(p1,shrink=0.7,aspect=10,ticks=[-1,-0.5,0,0.5,1])
        if len(mlabs)==4:
            plt.xticks([0,1,2,3],mlabs)
            plt.yticks([0,1,2,3],mlabs)
        else:
            plt.xticks(np.arange(0,n,1),mlabs)
            plt.yticks(np.arange(0,n,1),mlabs)
        plt.title(str(stag) + ' correlation matrix for ' + str(tlab));
        
        # second matrix plot (OPTIONAL)
        if iplot_second_set==1:
            ax2 = fig.add_subplot(nr,nc,pind+1)
            p2 = ax2.imshow(F2,cmap='jet',vmin=-1,vmax=1); #caxis([-1 1]), colorbar
            ax2.xaxis.tick_top()
            if len(mlabs)==4:
                plt.xticks([0,1,2,3],mlabs)
                plt.yticks([0,1,2,3],mlabs)
            else:
                plt.xticks(np.arange(0,n,1),mlabs)
                plt.yticks(np.arange(0,n,1),mlabs)
            plt.title(str(stag) + ' correlation matrix for ' + str(tlab2))
            fig.colorbar(p2,shrink=0.7,aspect=10,ticks=[-1,-0.5,0,0.5,1])
    plt.tight_layout()
    
    # scatterplots
    if n > NMAX:
        print('n = %i is > %i, so no scatterplots made'% (n,NMAX))
    else:
        fig = plt.figure(figsize=(8,8))
        nr = n-1
        nc = n-1
        for ii in range(n-1):
            jj=ii+1
            while jj<n: 
                px = np.array([msamples[ii,:]])
                py = np.array([msamples[jj,:]])
                iplot = nc*(ii) + jj;
                #disp([ii jj iplot]);
                ax = fig.add_subplot(nr,nc,iplot)
                ax.plot(px,py,sym1,markersize=2)
                ax.xaxis.tick_bottom()
                plt.xlabel(mlabs[ii])
                plt.ylabel(mlabs[jj])
                st1 = 'corr(%s) = %.3f (%.3f)'% (tlab,np.corrcoef(px,py,ddof=0)[0,1],rho[ii,jj])
                if iplot_second_set==1:
                    px = msamples2[ii,:]
                    py = msamples2[jj,:]
                    plt.plot(px,py,sym2,markersize=2);
                    st2 = 'corr(%s) = %.3f (%.3f)'% (tlab2,np.corrcoef(px,py,ddof=0)[0,1],rho2[ii,jj])
                    plt.title(str(st1)+'\n'+ str(st2))
                else:
                    plt.title(str(st1))
                jj+=1
    plt.tight_layout()
    plt.show()

In [None]:
# display distributions for each model parameter (nparm ROWS of cov_samples_m)
fig = plt.figure(figsize=(8,8))
nr = 2
nc = 2
for kk in range(nparm):
    sigma = sigma_post[kk]
    edges = np.arange(-4*sigma,4*sigma,sigma/2)
    etemp = mcov_samples[kk,:]
    plt.subplot(nr,nc,kk+1)
    plot_histo(etemp,edges)
    plt.ylim([0,0.4])
    plt.grid
    stl1 = 'mpost samples'
    stl2 = 'Model parameter ' + str(kk+1) + ' (' + str(mlabs[kk]) + ')'
    stl3 = 'mean = %.5s; std = %.5s' % (np.mean(etemp),np.std(etemp))
    if kk==0: 
        plt.title(str(stl1) + ': ' + str(stl2) + '\n' + str(stl3))
    else: 
        plt.title(str(stl2) + '\n' + str(stl3))
plt.tight_layout()

In [None]:
# correlation matrices and scatterplots
#plot_covsamples(mprior_samples,rho_prior,'mprior','b.',[],[],[],[],mlabs);
plot_covsamples(mpost_samples,rho_post,'mpost','r.',[],[],[],[],mlabs)
plot_covsamples(mprior_samples,rho_prior,'mprior','b.',mpost_samples,rho_post,'mpost','r.',mlabs)

In [None]:
# 'physical view' of estimated posterior data uncertainties
# note: plot either sigma_post_d (from Cpost_d) or std_d_samples (from d(Cpost_samples))
plt.figure(figsize=(8,8))
plt.subplot(aspect=1)
plt.plot(mpost_samples[0,:],mpost_samples[1,:],'c.')
plt.plot(mpost[0],mpost[1],'o',markersize=10,markerfacecolor='c',markeredgecolor='w')
#plot(mprior(1),mprior(2),'o','markersize',10,'markerfacecolor','b','markeredgecolor','w');
#scatter(xrec,yrec,16^2,sigma_post_d,'filled','V'); title('estimated uncertainties for posterior predictions'); 
plt.scatter(xrec,yrec,16**2,std_d_samples,marker='v',edgecolors='k',cmap='hot')
plt.xlabel('X distance (km)')
plt.ylabel('Y distance (km)')
plt.colorbar(shrink=0.8)
plt.title('uncertainties for posterior predictions, computed from samples')
plt.show()

In [None]:
# plot predictions for samples of the posterior
plot_covsamples(d_samples,rho_post_d,'dpost','k.',[],[],[],[],[])

In [None]:
# plot solution
# (if iforward=2, this will show a zoomed-in version as well)
for xx in range(iforward):
    plot_epicenters(mprior_samples,mprior,minitial,mtarget,[xrec,yrec,0,axepi],mpost)
    # plot the cpost0 samples and re-plot the two markers
    plt.plot(mpost_samples[0,:],mpost_samples[1,:],'c.')
    plt.plot(mpost[0],mpost[1],'o',markersize=10,markerfacecolor='c',markeredgecolor='w')
    plt.plot(mtarget[0],mtarget[1],'o',markersize=10,markerfacecolor='r',markeredgecolor='w')
    plt.plot(minitial[0],minitial[1],'o',markersize=10,markerfacecolor='k',markeredgecolor='w')

    plt.title('samples of prior (blue) and posterior (cyan)')
    #plt.xlim([0,100])
    #plt.ylim([0,100])
    #plt.axis('equal')
    if xx==1:
        plt.axis(axepi)
    #plt.tight_layout()