In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm, multivariate_normal
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from lh_sampling.sampler import lhs, resample
from lh_sampling.visualise import lhs_example_plot, plot_lhs, scatter_matrix_plot
from lh_sampling.util import read_db

## Drawing representative samples
To quantify uncertainty of ashfall forecasts in the absence of observational contstraints on eruption parameters we have to define good prior distributions and then draw representative samples from these distributions. Since we are limited in the number of samples we can draw by the number of weather forecast ensemble members, we are using Latin hypercube sampling (LHS). LHS is a type of stratefied sampling that ensures that every section of the parameter space is sampled once. The figure below shows an example of a random uniform sample (green dots) and the corresponding LHS sample (black crosses).

In [None]:
plot_lhs()

The following figure demonstrates that the variance in the LHS estimates of the first and second moment reduce faster than for standard Monte-Carlo estimates. a) shows the 2D Gaussian distribution from which samples are drawn; b) and c) show the LHS and Monte Carlo estimate, respectively, from 20 random samples. d) to f) show the evolution of the estimates of first and second moment for LHS and Monte Carlo samples with respect to the number of samples. We can see that LHS requires less samples to generate a representative eruption parameter ensemble.

In [None]:
fig = lhs_example_plot(seed=42)
fout = 'plots/lhs_example.png'
fig.write_image(fout, scale=2)
fig

## Eruption database
To construct the prior distribution we first assembled a database of eruption parameters (column height, mass eruption rate (MER), duration) from 213 historic eruptions. We then construct probability density functions (PDFs) for these parameters, either by approximating the cumuluative distribution function (CDF) with a mathmatical function or by fitting a gaussian distribution to the eruption parameters.

The following figure shows the distribution of eruption parameters in the database coloured by magma type. The diagonal panels shows CDFs, the upper panels show the data points, and the lower panels kernel density estimates of the same data points.

In [None]:
df = read_db()
fig = scatter_matrix_plot(df, hue='Magma type')
fout = 'plots/db_overview.png'
fig.savefig(fout, dpi=300)
fig

The figure below shows in blue the original data points for eruptions with predominantely mafic magma. Shown in orange are the resampled datapoints which were generated by approximating the first and second moment of the original distribution and then drawing 20 LHS samples.

In [None]:
df_m = df[['log Column height [km]', 'log MER [kg/s]', 'log Duration [h]']].where(df['Magma type'] == 'Mafic')
dfr = resample(df_m, constraints=[np.log(25), np.log(2e7), np.log(24)], seed=42)
fig = scatter_matrix_plot(dfr, hue='Category', log=True)
fout = 'plots/lhs_mafic.png'
fig.savefig(fout, dpi=300)
fig

In [None]:
df_i = df[['log Column height [km]', 'log MER [kg/s]', 'log Duration [h]']].where(df['Magma type'] == 'Intermediate')
df_ir = resample(df_i, nsamples=30, constraints=[np.log(25), np.log(2e9), np.log(24)], seed=42)
df_ir = df_ir[df_ir.Category == 'resampled']
df_ir['MER [kg/s]'] = np.exp(df_ir['log MER [kg/s]'])
df_ir['Column height [km]'] = np.exp(df_ir['log Column height [km]'])
df_ir['Duration [h]'] = np.exp(df_ir['log Duration [h]'])
df_ir.drop(columns=['log MER [kg/s]', 'log Duration [h]', 'log Column height [km]', 'Category'], inplace=True)
df_ir.reset_index(drop=True, inplace=True)
df_ir.to_csv('data/lh_sample_intermediate.csv', index=False)

In [None]:
import seaborn as sns
g = sns.PairGrid(df_ir, diag_sharey=False)
g.map_upper(sns.scatterplot, s=15)
g.map_lower(sns.kdeplot)
g.map_diag(sns.histplot)

While this is a purely statistical approach, the alternative would be to find functional forms of PDFs. The following shows an example to describe MER for eruptions at Etna volcano. The advantage of a functional form may be that it can be transferred and scaled to other volcanoes by considering different physical constraints. The disadvantage is that it is more difficult to derive functional forms for all three eruption parameters which also take into account the covariance structure. The figure also shows the result of approximating the MER data with a Gaussian distribution.

In [None]:
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from lh_sampling.util import ecdf

df_etna = df[df['Volcano']=='Etna']
df_etna = df_etna.sort_values(axis=0, by='MER')
dfr = resample(df_etna[['log Column height [km]', 'log MER [kg/s]', 'log Duration [h]']].copy())

x = df_etna['MER'].values
x_sorted, cdf = ecdf(x)
xr = dfr['log MER [kg/s]'].values
x_sortedr, cdfr = ecdf(xr)

def fitfun(data, a, b):
    return -a*np.log10(data)+b
prms, _ = curve_fit(fitfun, x_sorted, 1-cdf, p0=[19, 100])

with plt.style.context('seaborn'):
    plt.loglog(x, fitfun(x_sorted, prms[0], prms[1]), label='Best fit')
    plt.loglog(x_sorted, 1-cdf, label='Complementary CDF')
    plt.semilogx(np.exp(x_sortedr), 1-cdfr, label='Complementary CDF (Gaussian)')
    plt.legend()