In [1]:
# Import libraries
using Turing
using LinearAlgebra
using Distributions
using MultivariateStats
import MultivariateStats: reconstruct
using GaussianProcesses
using StatsBase
using Statistics
using Suppressor
using JLD2
using CSV
using DataFrames, DataFramesMeta
using SplitApplyCombine
using KernelFunctions
using MCMCChains
using PyCall
using PyPlot
using Printf
import PyCall.pyfunction

include("../Utils/scale_utils.jl")
using .ScaleUtils

# Access the matplotlib module
matplotlib = pyimport("matplotlib")
seaborn = pyimport("seaborn")
os = pyimport("os")
pyimport("scienceplots")
np = pyimport("numpy")
scipy = pyimport("scipy")
skl_model_selection = pyimport("sklearn.model_selection")
plt.style.use(["default","science","no-latex"])
using StatsPlots

seaborn.color_palette("colorblind")

PyCall.pygui(:tk)

# Set a seed for reproducibility
using Random
Random.seed!(11);

#import Pkg
#Pkg.add("PrettyTables")
FONTSIZE=20.5;

In [2]:
matplotlib_backends_pdf = pyimport("matplotlib.backends.backend_pdf")
PdfPages = matplotlib_backends_pdf.PdfPages


PyObject <class 'matplotlib.backends.backend_pdf.PdfPages'>

In [3]:
## PRIOR DISTRIBUTIONS

sample_size = 5000

vmThresh_vals = collect(np.linspace(80000+0.00001, 180000-0.00001, sample_size))
fricExp_vals = collect(np.linspace(0.1+0.00001, 0.333-0.00001, sample_size))
mu_scale_vals = collect(np.linspace(0.8+0.00001, 1.2-0.00001, sample_size))
stiff_scale_vals = collect(np.linspace(0.8+0.00001, 1.2-0.00001, sample_size))
gamma0_vals = collect(np.linspace(9620+0.0001, 471000-0.0001, sample_size))
melt_flux_vals = collect(np.linspace(12+0.0001, 58-0.0001, sample_size))


vmThresh_prior = collect(scipy.stats.truncnorm.pdf(vmThresh_vals, a=-3, b=3, loc=130000, scale=50000/3))
fricExp_prior = collect(scipy.stats.trapezoid.pdf(fricExp_vals, c=0.05/0.233, d=0.18/0.233, loc=0.1, scale=0.233))
mu_scale_prior = collect(scipy.stats.truncnorm.pdf(mu_scale_vals, a=-2, b=2, loc=1.0, scale=0.1))
stiff_scale_prior = collect(scipy.stats.truncnorm.pdf(stiff_scale_vals, a=-2, b=2, loc=1.0, scale=0.1))
trunc_norm_gamma0 = scipy.stats.truncnorm(a=np.log(9620)-10, b=np.log(471000)-10, loc=10, scale=1)
gamma0_prior = collect(trunc_norm_gamma0.pdf(np.log(gamma0_vals)) ./ gamma0_vals)
melt_flux_prior = collect(scipy.stats.truncnorm.pdf(melt_flux_vals, a=-2, b=2, loc=35, scale=11.5))

θ_prior = [vmThresh_prior;; fricExp_prior;; mu_scale_prior;; 
     stiff_scale_prior;; gamma0_prior;; melt_flux_prior];

#Loading original parameter data for scaling purposes
X_raw = CSV.read("../Data/Training_Data/Amery_Input_Parameters_Filtered.csv", DataFrame);
# 1) Grab all column‐names as Symbols
cols = Symbol.(names(X_raw))
# 2) Remove the index‐column symbol
cols = filter(c -> c != :Column1, cols)
# 3) Now call get_scaled_matrix on the remaining columns
X_scaled_t, X_scalers, X_mins, X_maxs = ScaleUtils.get_scaled_matrix(X_raw, cols);

In [4]:
function unscale_params(params)
    
    unscaled_θ_1 = ( params[1] * (X_maxs[:vmThresh] - X_mins[:vmThresh]) ) + X_mins[:vmThresh]
    unscaled_θ_2 = ( params[2] * (X_maxs[:fricExp] - X_mins[:fricExp]) ) + X_mins[:fricExp]
    unscaled_θ_3 = ( params[3] * (X_maxs[:mu_scale] - X_mins[:mu_scale]) ) + X_mins[:mu_scale]
    unscaled_θ_4 = ( params[4] * (X_maxs[:stiff_scale] - X_mins[:stiff_scale]) ) + X_mins[:stiff_scale]
    unscaled_θ_5 = ( params[5] * (X_maxs[:gamma0] - X_mins[:gamma0]) ) + X_mins[:gamma0]
    unscaled_θ_6 = ( params[6] * (X_maxs[:melt_flux] - X_mins[:melt_flux]) ) + X_mins[:melt_flux];
    unscaled_thetas = [unscaled_θ_1
                        unscaled_θ_2
                        unscaled_θ_3
                        unscaled_θ_4
                        unscaled_θ_5
                        unscaled_θ_6];
    return unscaled_thetas
    
end

unscale_params (generic function with 1 method)

In [5]:
using KernelDensity
function rescale_kde(data, a, b)
    # Fit a KDE to the data
    data_sorted = sort(data)
    kde = KernelDensity.kde(data_sorted)
    # Evaluate the KDE at the grid points
    kde_values = pdf(kde, data_sorted)
    # Compute the CDF
    cdf = cumsum(kde_values)
    # Normalize the CDF
    cdf ./= cdf[end]
    # Calculate the probability that the variable is between a and b
    last = searchsortedfirst(data_sorted, b)
    if  last > size(data_sorted)[1]
        last = last - 1
    end
    prob_a_to_b = cdf[last] - cdf[searchsortedfirst(data_sorted, a)]
    # Filter all the data to be less than b or greater than a
    mask = a .<= data_sorted .<= b
    filtered_data = data_sorted[mask]

    # Rescale the KDE values within [a, b] and zero out values outside
    #Since we're looking at values between A and B and we want to get a PDF for those values,
    # the curve must integrate to 1 to be valid. Rescaling below allows this to happen.
    kde_values_rescaled = pdf(kde, filtered_data) / prob_a_to_b

#     return data_sorted, kde_values_rescaled
    return filtered_data, kde_values_rescaled
end

rescale_kde (generic function with 1 method)

In [6]:
#Previous paper's Calibrated Posteriors
θ_previous = np.load("../Data/Training_Data/posterior_samples_All_Combined.npy");

In [7]:
#Just a helper function that maps a tuple to a single index for a 2x3 grid of plots
function tuple_to_idx(x,y)
    return ( (3*x - 3) + y )
end

#Function that plots a specific parameter's distribution from the posteriors dervied in Sanket's Paper
function plot_previous(previous_post, ax, var_idx)

        uppers_and_lowers = Dict([  1 => (80000,180000), 2 => (0.1,0.333),
                            3 => (0.8,1.2), 4 => (0.8,1.2),
                            5 => (np.log(9620),np.log(471000)), 6 => (12,58)  ])
    
        current_params = previous_post[:, var_idx]
        param_mean = mean(current_params)
        #Gamma_0 requires a log scale
        if var_idx == 5
            x, y = rescale_kde(np.log(current_params), uppers_and_lowers[var_idx][1], uppers_and_lowers[var_idx][2] )
            ax.plot(x, y,color="red", label = 2015, lw=2  )
            #ax.axvline(x=param_mean) #ymin=0, ymax=ax.get_ylim()[2], color = "red", linestyle="dashed"


        else
            x, y = rescale_kde(current_params, uppers_and_lowers[var_idx][1], uppers_and_lowers[var_idx][2] )
            ax.plot(x, y, color="red", label = "2015 Calibration", lw=2 )
            #ax.axvline(x=param_mean) #ymin=0, ymax=ax.get_ylim()[2], color = "red", linestyle="dashed"
                
        end
end
    
    
#Function to plot all years worth of a specific paramerters distributions
function plot_one_var_posterior(post_dict, years, ax, var_idx, cmaplist)

    uppers_and_lowers = Dict([  1 => (80000,180000), 2 => (0.1,0.333),
                                3 => (0.8,1.2), 4 => (0.8,1.2),
                                5 => (np.log(9620),np.log(471000)), 6 => (12,58)  ])
    
    for i in 1:length(years)
        #Get all n_obs posteriors for one variable
        #Below is line 41
        current_params = post_dict[years[i]][:,var_idx] 

        
        #Gamma_0 requires a log scale
        if var_idx == 5
                x, y = rescale_kde(np.log(current_params), uppers_and_lowers[var_idx][1], uppers_and_lowers[var_idx][2] )
                ax.plot(x, y, lw=1.8, color= cmaplist(norm(years[i])))



        else
                x, y = rescale_kde(current_params, uppers_and_lowers[var_idx][1], uppers_and_lowers[var_idx][2] )
                ax.plot(x, y, lw=1.8, color= cmaplist(norm(years[i])) )
                
  
        end
    end       
end

plot_one_var_posterior (generic function with 1 method)

In [8]:
#Function that plots all posterior distribtuons of all parameters

function plot_all_Rs_KDE_pdf(post_dict, years, cmaplist, prev, pdf, theta)
   
            fig, ax = PyPlot.subplots(nrows=2, ncols=3, figsize=(18, 11), dpi=300,
                             gridspec_kw=Dict("height_ratios"=> [1, 1], "width_ratios" => [1, 1, 1],
                                              "wspace"=> 0.175, "hspace"=> 0.25))
            title_dict = Dict([
                1 => L"\sigma_{max}", 2 => L"q" , 3 => L"C_{\mu}" ,
                4 => L"C_{\phi}", 5 => L"\log(\gamma_0)", 6 => L"\overline{m}" ]) 
    
            tick_dict = Dict([
                    1 => [80000,100000,120000,140000,160000,180000], 2 => [0.1,0.15,0.215,0.28,0.333], 3 => [0.8,0.9,1,1.1,1.2],
                    4 => [0.8,0.9,1,1.1,1.2], 5 => [9.17,10,11,12,13.06], 6 => [12,20,30,40,50,58] ]) 
            
            lim_dict = Dict([
                    1 => ([0.75e5,1.85e5], [0.01e-5,4.15e-5]), 2 => ([0.093,0.34], [0.01,8] ), 3 => ([0.79,1.21], [0.01,6.45] ),
                    4 => ([0.79,1.21], [0.01,6.85] ), 5 => ([5,13.2], [0.001,1] ), 6 => ( [10.5,59.5], [0.0001,0.0465] )  ]) 
        
            for i in 1:2
                for j in 1:3
                    var_idx = tuple_to_idx(i,j)
                    #Plot the posteriors
                    plot_one_var_posterior(post_dict, years, ax[i,j], var_idx , cmaplist )
                    #Plot the 2015 relx posteriors
                    plot_previous(prev, ax[i,j], var_idx)
    
                    #Plot formatting
                    ax[i,j].set_title(title_dict[var_idx],fontsize=1.2*FONTSIZE, pad=10)    #("von Mises threshold (Pa)")        
                    ax[i,j].ticklabel_format(style="sci", scilimits=(-2,2), useMathText=true)
                    ax[i,j].tick_params(axis="both", which="major", labelsize=FONTSIZE*0.8)
                    ax[i,j].xaxis.offsetText.set_fontsize(FONTSIZE*0.8)
                    ax[i,j].yaxis.offsetText.set_fontsize(FONTSIZE*0.8)
                    ax[i,j].set_xticks(tick_dict[var_idx])
                    ax[i,j].locator_params(tight=true, nbins=6)
                    #ax[i,j].set_xlim(lim_dict[var_idx][1])
                    #ax[i,j].set_ylim(lim_dict[var_idx][2])
                    ax[i,j].grid(true, alpha=0.5, zorder=1)
                
                end
            end
        
        # --- Plotting Priors --- #
            ax[1,1].plot(vmThresh_vals, θ_prior[:,1], lw=1.2, ls="dashed", label="prior", color = "black")
            ax[1,2].plot(fricExp_vals, θ_prior[:,2], lw=1.2, ls="dashed", color = "black")
            ax[1,3].plot(mu_scale_vals, θ_prior[:,3], lw=1.2, ls="dashed", color = "black")
            ax[2,1].plot(stiff_scale_vals, θ_prior[:,4], lw=1.2, ls="dashed", color = "black")
            ax[2,2].plot(np.log(gamma0_vals), θ_prior[:,5].*gamma0_vals, lw=1.2, linestyle="dashed", color = "black")
            ax[2,3].plot(melt_flux_vals, θ_prior[:,6], lw=1.2, ls="dashed", color = "black")
        

            ax[1,1].axvline(x = theta[1], lw = 2, color = "black", ls = "dashdot")
            ax[1,2].axvline(x = theta[2], lw = 2, color = "black", ls = "dashdot")
            ax[1,3].axvline(x = theta[3], lw = 2, color = "black", ls = "dashdot")
            ax[2,1].axvline(x = theta[4], lw = 2, color = "black", ls = "dashdot")
            ax[2,2].axvline(x = np.log(theta[5]), lw = 2, color = "black", ls = "dashdot")
            ax[2,3].axvline(x = theta[6], lw = 2, color = "black", ls = "dashdot")

        
    
        # --- Colorbar Implementation --- #
            scalar_mappable = PyPlot.matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap)
            scalar_mappable.set_array(max_years)
            #plt.tight_layout()
            cbar_ax = fig.add_axes([0.95, 0.25, 0.02, 0.5])  # Position: [left, bottom, width, height]
            color_bar = plt.colorbar(scalar_mappable, cax=cbar_ax, pad = 0.1)
    
            color_bar.set_ticks(max_years)
            color_bar.ax.set_title("Max \n constraining \nyear", fontsize=FONTSIZE*0.8, pad=20)
            color_bar.set_ticklabels(["$(y)" for y in max_years])
            color_bar.ax.tick_params(axis="both", which="major", labelsize=FONTSIZE*0.8)
    
            handles, labels = ax[1,1].get_legend_handles_labels()
            desired_indices = [1,2]
            sorted_handles = [handles[i] for i in desired_indices]
            sorted_labels = [labels[i] for i in desired_indices]
            fig.legend(sorted_handles, sorted_labels, loc="lower center", ncol=6, fontsize=FONTSIZE,
                       bbox_to_anchor=(0.512, -0.01), bbox_transform=PyPlot.gcf().transFigure,
                       frameon="True", framealpha=1)  
            
            pdf.savefig(fig)
    
            PyPlot.close(fig)
    
end

plot_all_Rs_KDE_pdf (generic function with 1 method)

In [9]:
#Define plot saveout location
figure_save_path = "../Plots/KDE_Plots/All_Realizations_KDE_plots.pdf"
#Where your posteriors are located
posteriors_directory = "../Data/Posterior_Data"
#Where your generating parameters sets are located
future_obs_directory = "../Data/Future_Observation_Data/Generative_Parameters/Official_Constraining_Observations-metadata"

max_years = collect(range(2030, step=15, length=19))
yr_min, yr_max = minimum(max_years), maximum(max_years)
norm = PyPlot.matplotlib.colors.Normalize(vmin=yr_min, vmax=yr_max)
cmap = PyPlot.cm.get_cmap("cool")


pdf_pages = PdfPages(figure_save_path)

#Realization numbers
Realizations = [i for i in 1:100];
#Misconverged realization numbers
m = [4, 15, 20, 44, 47,67,81,82,89,100]
filter!(x -> !(x in m), Realizations)
r_filtered = [string(r) for r in Realizations];

try
    for realization in r_filtered
        ### Enter correct posterior dict in line below
        chosen_dict = JLD2.load("$(posteriors_directory)/R_$(realization)_Posterior_Dict.jld2", "post_data")
        θ = JLD2.load("$(future_obs_directory)/$(realization)_emulator_data.jld2","θ");
        θ_un = unscale_params(θ)
        
        plot_all_Rs_KDE_pdf(chosen_dict, max_years, cmap, θ_previous, pdf_pages, θ_un)
        println("Done with realization: $(realization) ")
    end
        

finally 
    # --- Always close the PdfPages object to finalize the PDF file ---
    # This ensures the PDF is properly written to disk and not corrupted.
    pdf_pages.close()
end

Done with realization: 1 
Done with realization: 2 
Done with realization: 3 
Done with realization: 5 
Done with realization: 6 
Done with realization: 7 
Done with realization: 8 
Done with realization: 9 
Done with realization: 10 


LoadError: InterruptException: