In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as patches
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
import numpy as np

df_val_all = pd.read_csv('3/q_kge_med.csv')
w83_all = pd.read_csv('W83_results.csv')
w83_all = w83_all.rename(columns={'Q_est': 'Q_est_w83'})

df = df_val_all.merge(w83_all[['stationid','date','Q_est_w83']],on = ['stationid','date'] ,how = 'left')

def KGE(y_true, y_pred):  # improved 2012
    correlation = np.corrcoef(y_true, y_pred)[0, 1]
    alpha = (np.std(y_pred)/np.mean(y_pred)) / (np.std(y_true)/np.mean(y_true))
    beta = np.mean(y_pred) / np.mean(y_true)
    return 1 - np.sqrt((correlation - 1)**2 + (alpha - 1)**2 + (beta - 1)**2)


# 定义计算NSE的函数
def NSE(observed, simulated):
    return 1 - np.sum((observed - simulated) ** 2) / np.sum((observed - np.mean(observed)) ** 2)

# 定义计算rRMSE的函数
def nRMSE(observed, simulated):
    rmse = np.sqrt(mean_squared_error(observed, simulated))
    return rmse / np.mean(observed)

# 定义计算CC的函数
def CC(observed, simulated):
    return pearsonr(observed, simulated)[0]  # 使用Spearman相关系数作为CC


stationids = df['stationid'].unique()
valid = []
valid_w83 = []

for s in stationids:
    df_station = df[df['stationid']==s]
    observed = df_station['qobs']
    simulated = df_station['Q_est']
    cc = CC(observed, simulated)
    nrmse = nRMSE(observed, simulated)
    kge = KGE(observed, simulated)
    nse = NSE(observed, simulated)
    pbias = (simulated.mean() / observed.mean() - 1) 
    rv    = simulated.std() / simulated.mean()/ (observed.std() / observed.mean())
    valid.append({'stationid': s, 'KGE': kge, 'NSE': nse,'NRMSE': nrmse,'CC': cc, 'pBIAS': pbias, 'RV': rv})
    simulated = df_station['Q_est_w83']
    cc = CC(observed, simulated)
    nrmse = nRMSE(observed, simulated)
    kge = KGE(observed, simulated)
    nse = NSE(observed, simulated)
    pbias = (simulated.mean() / observed.mean() - 1) 
    rv    = simulated.std() / simulated.mean()/ (observed.std() / observed.mean())
    valid_w83.append({'stationid': s, 'KGE': kge, 'NSE': nse,'NRMSE': nrmse,'CC': cc, 'pBIAS': pbias, 'RV': rv})


swap_df = pd.DataFrame(valid)
w83_df = pd.DataFrame(valid_w83)
swap_df.to_csv('validation_SWAP_datematch.csv')
w83_df.to_csv('validation_W83_datematch.csv')


columns = ['KGE', 'NSE', 'NRMSE', 'CC', 'pBIAS', 'RV']
# 删除包含缺失值的行
swap_df.dropna(subset=columns, inplace=True)
w83_df.dropna(subset=columns, inplace=True)
# print('bam_df',bam_df)
# print('swap_df',swap_df)
# print('geobam_df',geobam_df)
# print('w83_df',w83_df)

# Define x-axis ranges for each subplot
x_axis_ranges = {
    'KGE': [-1, 1],
    'NSE': [-1, 1],
    'NRMSE': [0, 1],
    'CC': [0, 1],
    'pBIAS': [0, 1],
    'RV': [0, 2]
}

# Set up the figure and axes with different colors for each label
fig, axes = plt.subplots(2, 3, figsize=(18, 10))

import numpy as np
import matplotlib.pyplot as plt

# Define the function to plot PDF using plt.hist
def plot_pdf(data, ax, label, x_range=None, color=None, bins=20, edge_alpha=1.0, fill_alpha=0.25):
    # Plot the histogram directly using plt.hist with edgecolor and facecolor (separate transparency)
    n, bins, patches = ax.hist(data, bins=bins, label=label, range=x_range, density=True)

    # Set edgecolor and facecolor transparency separately
    for patch in patches:
        # Set edge color with transparency (control border transparency)
        patch.set_edgecolor(color)
        patch.set_linewidth(2)  # Optional: set the linewidth for edges
        patch.set_alpha(edge_alpha)

        # Set facecolor with transparency (control fill transparency)
        patch.set_facecolor(color)
        patch.set_alpha(fill_alpha)



# Define a function to plot CDF with different colors
def plot_cdf(data, ax, label, x_range=None, color=None):
    sorted_data = sorted(data)
    cdf = [i / len(sorted_data) for i in range(1, len(sorted_data) + 1)]
    ax.plot(sorted_data, cdf, label=label, color=color)

    # Set x-axis limits
    if x_range:
        ax.set_xlim(x_range)
    
    # ax.set_xlabel(label)
    ax.set_ylabel('CDF')

# Define distinct colors for each dataset
colors = {
    'W83': 'red',
    'SWAP': 'orange'
}


# Modify the original plotting loop to change to PDF for the last two columns
for i, col in enumerate(columns):
    ax = axes[i//3, i%3]  # Calculate the subplot position

    if col == 'pBIAS':  # For pBIAS, use the absolute value distribution
        plot_cdf(np.abs(w83_df[col]), ax, 'W83', x_range=x_axis_ranges[col], color=colors['W83'])
        plot_cdf(np.abs(swap_df[col]), ax, 'SWAP', x_range=x_axis_ranges[col], color=colors['SWAP'])
    elif col == 'RV':  # For RV, use the absolute value of (RV-1)
        plot_cdf(np.abs(w83_df[col] - 1), ax, 'W83', x_range=x_axis_ranges[col], color=colors['W83'])
        plot_cdf(np.abs(swap_df[col] - 1), ax, 'SWAP', x_range=x_axis_ranges[col], color=colors['SWAP'])
    else:  # For other columns, continue plotting CDF
        plot_cdf(w83_df[col], ax, 'W83', x_range=x_axis_ranges[col], color=colors['W83'])
        plot_cdf(swap_df[col], ax, 'SWAP', x_range=x_axis_ranges[col], color=colors['SWAP'])

    ax.legend()
    ax.set_title(col)

# Adjust layout and save the plot
plt.tight_layout()
# plt.savefig('comparing-PDF-and-CDF.png', dpi=500)
plt.show()