# Post-Processing Calibration Results

Use this script to load and plot calibration results.

## Setup

First, you will need to load all necessary libraries, set up your access to google drive, and load necessary functions. 



In [None]:
# Load necessary libraries
import collections
import sys
import textwrap
import math
import csv
import numpy as np 
import statistics as stat
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from scipy.optimize import curve_fit
import utils as ut

## Define Paths

`path` is the location of your csv files

`path_itr` is the location of your iteration files (only needed if you'd like to look at the convergence of error)

In [None]:
# Path to your csv files (and iteration files):
path='MD3/csv/'
path_itr='MD3/final_res/'

## STEP 1 PLOTS

Be sure to update with your csv filenames

In [None]:
filenames = ['STEP1-MD3_CR.csv', 'STEP1-MD3_CR2.csv']
model_filenames=['STEP1-MD3_CR_model.csv', 'STEP1-MD3_CR2_model.csv']
filename_itr='STEP1-MD3_CR2.iterationresults'
targets='GPP'

In [None]:

#load parameter dictionary and plot optimal parameters

df_params = ut.read_all_csv(path, filenames, type='params')
#pandas reads csv into a dataframe. to use stacked histogram, we need a dictionary of optimal params
#Change index to parameter names, transpose df, rows 4:end are optimal params, and convert to a dictionary:
mparams=df_params.set_index('parameters').T.iloc[4:,:].to_dict('list')
print(mparams)
df_params.iloc[:,5:].plot(logy=True, xlabel="param_id", title="optimal parameters", style="o")

In [None]:
#plot errors

err=ut.read_all_csv_errors(path, filenames)
print(err)
ut.plot_err(err)

Note: 

`mparams` is a dictionary, with keys being parameters and values being the optimal values found for that key.

`err` is a list of the final errors for each calibration run.

The dictionary of parameters and list of errors is used to plot histograms.

In [None]:
#plot match-plots (normal and log scale)

df_model = ut.read_all_csv(path, model_filenames, type='model')
ut.match_plot(df_model,df_params,targets)

In [None]:
#Get error clusters
float_err=[float(x) for x in err]
rounded_err=list(np.round(float_err,7))
y_kmeans,centers=ut.get_err_clusters(float_err)

#Organize parameters values by kmeans error clusters
ut.plot_stacked_histograms(mparams,centers,y_kmeans,std=0)

In [None]:
#plot iteration file errors
rounded_err_itr,idx,err_by_run=ut.load_sort_itr_err(path_itr,filename_itr)
ut.plot_err_by_run(err_by_run,idx, deg=1)

## STEP 2 PLOTS

Be sure to update with your csv filenames where appropriate

In [None]:
filenames = ['STEP2-MD3_CR.csv']
model_filenames=['STEP2-MD3_CR_model.csv']
filename_itr='STEP2-MD3_CR.csv.iterationresults'
targets='NPP'

In [None]:
#load parameter dictionary and plot optimal parameters

df_params = ut.read_all_csv(path, filenames, type='params')
mparams=df_params.set_index('parameters').T.iloc[4:,:].to_dict('list')
df_params.iloc[:,5:].plot(logy=True, xlabel="param_id", title="optimal parameters", style="o")

In [None]:
#plot errors

err=ut.read_all_csv_errors(path, filenames)
print(err)
ut.plot_err(err)

In [None]:
#plot match-plots (normal and log scale)

df_model = ut.read_all_csv(path, model_filenames, type='model')
ut.match_plot(df_model,df_params,targets)

In [None]:
#Get error clusters
float_err=[float(x) for x in err]
rounded_err=list(np.round(float_err,7))
y_kmeans,centers=ut.get_err_clusters(float_err)

#Organize parameters values by kmeans error clusters
ut.plot_stacked_histograms(mparams,centers,y_kmeans,std=0)

## STEP 3C PLOTS

Be sure to update with your csv filenames where appropriate

In [None]:
filenames = ['STEP3-MD3_CR1.csv', 'STEP3-MD3_CR2.csv', 'STEP3-MD3_CR3.csv', 'STEP3-MD3_CR4.csv',
             'STEP3-MD3_CR5.csv', 'STEP3-MD3_CR6.csv']
model_filenames=['STEP3-MD3_CR1_model.csv', 'STEP3-MD3_CR2_model.csv', 'STEP3-MD3_CR3_model.csv',
                'STEP3-MD3_CR4_model.csv', 'STEP3-MD3_CR5_model.csv', 'STEP3-MD3_CR6_model.csv']
filename_itr='STEP3-MD3_CR1.iterationresults'
targets='NPP/VEGC'

In [None]:
#load parameter dictionary and plot optimal parameters

df_params = ut.read_all_csv(path, filenames, type='params')
mparams=df_params.set_index('parameters').T.iloc[4:,:].to_dict('list')
df_params.iloc[:,5:].plot(logy=True, xlabel="param_id", title="optimal parameters", style="o")

In [None]:
#plot errors

err=ut.read_all_csv_errors(path, filenames)
ut.plot_err(err)

In [None]:
#plot match-plots (normal and log scale)

df_model = ut.read_all_csv(path, model_filenames, type='model')
ut.match_plot(df_model,df_params,targets)

In [None]:
#Get error clusters
float_err=[float(x) for x in err]
rounded_err=list(np.round(float_err,7))
y_kmeans,centers=ut.get_err_clusters(float_err)

#Organize parameters values by kmeans error clusters
ut.plot_stacked_histograms(mparams,centers,y_kmeans,x=24,y=20,r=4,c=4,std=0)