<h1>Performance Evaluation and Validation</h1>

**Strategy**
1. Gap length and temporal differences
    - small gaps vs. large gaps
    - seasonal differences
2. Validation metrics
    - correlation coefficient (R2) + scatter plot
    - direct visual comparison in time series plot
    - MSE
    - MAE
3. Types of configurations to compare
    - Baseline models
        - const. filled
        - linear filled
    - LSTM
        - Validate against test data vs. validate against in situ data

4. Time series splits wherever data contains causal effects?
    - Do we have causal effects in our data?

<h1>1. Comparison against ERA5 test data</h1>

<h3>Small gaps vs. large gaps</h3>

In [None]:
@widgets.interact(
    comp=widgets.Dropdown(
        options=['Correlation coefficient R² (with test data)', 'MAE', 'MSE'],
        description='Select evaluation metric:', style={'description_width': '120px'}, layout=widgets.Layout(width='300px'))
)

def performance_metrics(comp):

    # Distinguish between 1-day-gaps and longer gaps
    PREDICTION_gaps = count_gaps(np.isnan(PREDICTION['train'].values), PREDICTION['train'].index.values)
    PREDICTION_smallgaps = PREDICTION.loc[PREDICTION_gaps[PREDICTION_gaps['gapsize']==1].index.values]
    PREDICTION_largegaps = PREDICTION[~ PREDICTION.index.isin(PREDICTION_gaps[PREDICTION_gaps['gapsize']==1].index)]

    datasets = [PREDICTION, PREDICTION_smallgaps, PREDICTION_largegaps]
    dataset_labels = ['All gaps', 'Small gaps (1 day)', 'Large gaps (> 1 day)']
    

    models = ['test'] + [c for c in PREDICTION.columns if c.startswith('filled')]
    plot_df = pd.DataFrame(columns = dataset_labels, index = [['test'] + [c for c in PREDICTION.columns if c.startswith('filled')]])
    
    
    
    if comp == 'Correlation coefficient R² (with test data)':
        for i, dataset in enumerate(datasets):
            corr_df = dataset[['test'] + [c for c in dataset.columns if c.startswith('filled')]].corr()
            plot_df[dataset_labels[i]] = corr_df['test'].values   
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel(comp)
    
    elif comp == 'MAE':
    
        for i, dataset in enumerate(datasets):
            mae = [abs(dataset[model]-dataset['original']).mean() for model in models]
            #print('mae:', mae)
            plot_df[dataset_labels[i]] = mae
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel('Volumetric soil moisture [m³/m³]')
    
    elif comp == 'MSE':
        for i, dataset in enumerate(datasets):
            mse = [((dataset[model]-dataset['original'])**2).mean() for model in models]
            plot_df[dataset_labels[i]] = mse
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel('Volumetric soil moisture [m³/m³]')

    display(plot_df)
    
    plt.legend()
    plt.title(f'{comp} of different (baseline and ML) models')
    plt.grid()
    plt.tight_layout()
    plt.show()

    
    fig, ax = plt.subplots(2,len(datasets), figsize = (12,8))
    for i, dataset in enumerate(datasets):  
        ax[0,i].plot(np.arange(0.05,0.5,0.1), np.arange(0.05,0.5,0.1), c = 'black', linestyle = 'dashed')
        ax[0,i].set_title(dataset_labels[i])    
        for j, model in enumerate(['test'] + [c for c in dataset.columns if c.startswith('filled')]):    
            ax[0,i].scatter(dataset['test'], dataset[model], label = model) 
            ax[1,i].plot(dataset[model], label = model)            
    ax[0,0].legend()
    ax[0,0].set_ylabel('Modelled Volumetric soil moisture [m³/m³]')
    ax[0,0].set_xlabel('True Volumetric soil moisture [m³/m³]')

<h3>Seasonal comparison</h3>

In [None]:
@widgets.interact(
    comp=widgets.Dropdown(
        options=['Correlation coefficient R² (with test data)', 'MAE', 'MSE'],
        description='Select evaluation metric:', style={'description_width': '120px'}, layout=widgets.Layout(width='300px'))
)

def performance_metrics(comp):

    # Distinguish between 4 different seasons
    PREDICTION_spring = PREDICTION[PREDICTION.index.month.isin([3,4,5])]
    PREDICTION_summer = PREDICTION[PREDICTION.index.month.isin([6,7,8])]
    PREDICTION_fall = PREDICTION[PREDICTION.index.month.isin([9,10,11])]
    PREDICTION_winter = PREDICTION[PREDICTION.index.month.isin([12,1,2])]

    datasets = [PREDICTION, PREDICTION_spring, PREDICTION_summer, PREDICTION_fall, PREDICTION_winter]
    dataset_labels = ['All seasons', 'Spring', 'Summer', 'Fall', 'Winter']


    
    models = ['test'] + [c for c in PREDICTION.columns if c.startswith('filled')]
    plot_df = pd.DataFrame(columns = dataset_labels, index = [['test'] + [c for c in PREDICTION.columns if c.startswith('filled')]])
    
    
    
    if comp == 'Correlation coefficient R² (with test data)':
        for i, dataset in enumerate(datasets):
            corr_df = dataset[['test'] + [c for c in dataset.columns if c.startswith('filled')]].corr()
            plot_df[dataset_labels[i]] = corr_df['test'].values   
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel(comp)
    
    elif comp == 'MAE':
    
        for i, dataset in enumerate(datasets):
            mae = [abs(dataset[model]-dataset['original']).mean() for model in models]
            #print('mae:', mae)
            plot_df[dataset_labels[i]] = mae
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel('Volumetric soil moisture [m³/m³]')
    
    elif comp == 'MSE':
        for i, dataset in enumerate(datasets):
            mse = [((dataset[model]-dataset['original'])**2).mean() for model in models]
            plot_df[dataset_labels[i]] = mse
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel('Volumetric soil moisture [m³/m³]')

    display(plot_df)
    
    plt.legend()
    plt.title(f'{comp} of different (baseline and ML) models')
    plt.grid()
    plt.tight_layout()
    plt.show()

    
    fig, ax = plt.subplots(2,len(datasets), figsize = (12,8))
    for i, dataset in enumerate(datasets):  
        ax[0,i].plot(np.arange(0.05,0.5,0.1), np.arange(0.05,0.5,0.1), c = 'black', linestyle = 'dashed')
        ax[0,i].set_title(dataset_labels[i])   
        for j, model in enumerate(['test'] + [c for c in dataset.columns if c.startswith('filled')]):    
            ax[0,i].scatter(dataset['test'], dataset[model], label = model) 
            ax[1,i].plot(dataset[model], label = model)           
    ax[0,0].legend()
    ax[0,0].set_ylabel('Modelled Volumetric soil moisture [m³/m³]')
    ax[0,0].set_xlabel('True Volumetric soil moisture [m³/m³]')

<h1>2. Comparison against in situ SM data</h1>

<h3>Bias correction</h3>

In [1]:
# Correct for bias between ERA5 and in situ or not?
bias_correction = True

bias = PREDICTION['original'].mean()-PREDICTION['in situ SM'].mean()

PREDICTION[['original','in situ SM']].plot()
plt.plot(PREDICTION['in situ SM']+bias, label = 'Bias-corrected in situ SM')
plt.title(f'Bias of ERA5 original model data and in situ SM: {bias:.3f} [m³/m³]')
plt.legend()
plt.show()

if bias_correction:
    PREDICTION['in situ SM'] = DATA[[c for c in DATA.columns if c.startswith('in situ')]]['in situ SM']+bias
else:
    PREDICTION['in situ SM'] = DATA[[c for c in DATA.columns if c.startswith('in situ')]]['in situ SM']

NameError: name 'PREDICTION' is not defined

<h3>Small gaps vs. large gaps</h3>

In [None]:
@widgets.interact(
    comp=widgets.Dropdown(
        options=['Correlation coefficient R² (with in situ data)', 'MAE', 'MSE'],
        description='Select evaluation metric:', style={'description_width': '120px'}, layout=widgets.Layout(width='300px'))
)

def performance_metrics(comp):

    # Distinguish between 1-day-gaps and longer gaps
    PREDICTION_gaps = count_gaps(np.isnan(PREDICTION['train'].values), PREDICTION['train'].index.values)
    PREDICTION_smallgaps = PREDICTION.loc[PREDICTION_gaps[PREDICTION_gaps['gapsize']==1].index.values]
    PREDICTION_largegaps = PREDICTION[~ PREDICTION.index.isin(PREDICTION_gaps[PREDICTION_gaps['gapsize']==1].index)]

    datasets = [PREDICTION, PREDICTION_smallgaps, PREDICTION_largegaps]
    dataset_labels = ['All gaps', 'Small gaps (1 day)', 'Large gaps (> 1 day)']
    

    models = ['in situ SM', 'test'] + [c for c in PREDICTION.columns if c.startswith('filled')]
    plot_df = pd.DataFrame(columns = dataset_labels, index = [['in situ SM', 'test'] + [c for c in PREDICTION.columns if c.startswith('filled')]])
    #print('models: ', models)
    
    
    if comp == 'Correlation coefficient R² (with in situ data)':
        for i, dataset in enumerate(datasets):
            corr_df = dataset[['in situ SM', 'test'] + [c for c in dataset.columns if c.startswith('filled')]].corr()
            plot_df[dataset_labels[i]] = corr_df['in situ SM'].values   
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel(comp)
        #print('model: ', model)
        #print('corr_df: ', corr_df)
        #print('plot_df: ', plot_df)
    
    elif comp == 'MAE':
    
        for i, dataset in enumerate(datasets):
            mae = [abs(dataset[model]-dataset['in situ SM']).mean() for model in models]
            #print('mae:', mae)
            plot_df[dataset_labels[i]] = mae
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel('Volumetric soil moisture [m³/m³]')
    
    elif comp == 'MSE':
        for i, dataset in enumerate(datasets):
            mse = [((dataset[model]-dataset['in situ SM'])**2).mean() for model in models]
            plot_df[dataset_labels[i]] = mse
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel('Volumetric soil moisture [m³/m³]')

    display(plot_df)
    
    plt.legend()
    plt.title(f'{comp} of different (baseline and ML) models')
    plt.grid()
    plt.tight_layout()
    plt.show()

    
    fig, ax = plt.subplots(2,len(datasets), figsize = (12,8))
    for i, dataset in enumerate(datasets):  
        ax[0,i].plot(np.arange(0.05,0.5,0.1), np.arange(0.05,0.5,0.1), c = 'black', linestyle = 'dashed')
        ax[0,i].set_title(dataset_labels[i])    
        for j, model in enumerate(['in situ SM', 'test'] + [c for c in dataset.columns if c.startswith('filled')]):    
            ax[0,i].scatter(dataset['in situ SM'], dataset[model], label = model) 
            ax[1,i].plot(dataset[model], label = model)            
    ax[0,0].legend()
    ax[0,0].set_ylabel('Modelled Volumetric soil moisture [m³/m³]')
    ax[0,0].set_xlabel('In situ Volumetric soil moisture [m³/m³]')

<h3>Seasonal comparison</h3>

In [None]:
@widgets.interact(
    comp=widgets.Dropdown(
        options=['Correlation coefficient R² (with in situ data)', 'MAE', 'MSE'],
        description='Select evaluation metric:', style={'description_width': '120px'}, layout=widgets.Layout(width='300px'))
)

def performance_metrics(comp):

    # Distinguish between 4 different seasons
    PREDICTION_spring = PREDICTION[PREDICTION.index.month.isin([3,4,5])]
    PREDICTION_summer = PREDICTION[PREDICTION.index.month.isin([6,7,8])]
    PREDICTION_fall = PREDICTION[PREDICTION.index.month.isin([9,10,11])]
    PREDICTION_winter = PREDICTION[PREDICTION.index.month.isin([12,1,2])]

    datasets = [PREDICTION, PREDICTION_spring, PREDICTION_summer, PREDICTION_fall, PREDICTION_winter]
    dataset_labels = ['All seasons', 'Spring', 'Summer', 'Fall', 'Winter']


    
    models = ['in situ SM', 'test'] + [c for c in PREDICTION.columns if c.startswith('filled')]
    plot_df = pd.DataFrame(columns = dataset_labels, index = [['in situ SM', 'test'] + [c for c in PREDICTION.columns if c.startswith('filled')]])
    
    
    
    if comp == 'Correlation coefficient R² (with in situ data)':
        for i, dataset in enumerate(datasets):
            corr_df = dataset[['in situ SM', 'test'] + [c for c in dataset.columns if c.startswith('filled')]].corr()
            plot_df[dataset_labels[i]] = corr_df['in situ SM'].values   
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel(comp)
    
    elif comp == 'MAE':
    
        for i, dataset in enumerate(datasets):
            mae = [abs(dataset[model]-dataset['in situ SM']).mean() for model in models]
            #print('mae:', mae)
            plot_df[dataset_labels[i]] = mae
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel('Volumetric soil moisture [m³/m³]')
    
    elif comp == 'MSE':
        for i, dataset in enumerate(datasets):
            mse = [((dataset[model]-dataset['in situ SM'])**2).mean() for model in models]
            plot_df[dataset_labels[i]] = mse
        for model in plot_df.index.values:
            plt.scatter(plot_df.columns, plot_df.loc[model], label = model)
        plt.ylabel('Volumetric soil moisture [m³/m³]')

    display(plot_df)
    
    plt.legend()
    plt.title(f'{comp} of different (baseline and ML) models')
    plt.grid()
    plt.tight_layout()
    plt.show()

    
    fig, ax = plt.subplots(2,len(datasets), figsize = (12,8))
    for i, dataset in enumerate(datasets):  
        ax[0,i].plot(np.arange(0.05,0.5,0.1), np.arange(0.05,0.5,0.1), c = 'black', linestyle = 'dashed')
        ax[0,i].set_title(dataset_labels[i])   
        for j, model in enumerate(['in situ SM', 'test'] + [c for c in dataset.columns if c.startswith('filled')]):    
            ax[0,i].scatter(dataset['in situ SM'], dataset[model], label = model) 
            ax[1,i].plot(dataset[model], label = model)           
    ax[0,0].legend()
    ax[0,0].set_ylabel('Modelled Volumetric soil moisture [m³/m³]')
    ax[0,0].set_xlabel('True Volumetric soil moisture [m³/m³]')