In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

In [None]:
# Load the dataset
data = pd.read_csv('/Users/yubin/Library/CloudStorage/Box-Box/master thesis and what not/Geoglows V2 paper/Validation/difference 1990-2019/Geoglows V1,V2,BC,SFDC metrics.csv')
data

In [None]:
# Extract the KGE_2012 colums
kge_v1 = data['kge_2012_v1'].dropna()
kge_v2 = data['kge_2012_v2'].dropna()
kge_bc = data['kge_2012_BC'].dropna()
kge_saber = data['kge_BC_SFDC'].dropna()

In [None]:
# Function to calculate cumulative distribution
def calculate_cumulative_distribution(series):
    counts = series.value_counts().sort_index()  # Sort by index to ensure proper cumulative calculation
    cumulative_counts = counts.cumsum()
    cumulative_distribution = cumulative_counts / len(series)
    return cumulative_distribution

# Calculate cumulative distributions
cumulative_distribution_v1 = calculate_cumulative_distribution(kge_v1)
cumulative_distribution_v2 = calculate_cumulative_distribution(kge_v2)
cumulative_distribution_bc = calculate_cumulative_distribution(kge_bc)
cumulative_distribution_saber = calculate_cumulative_distribution(kge_saber)

# Ensure all distributions are aligned to the same index, including -0.41
x_value = -0.41
common_index = cumulative_distribution_v1.index.union(cumulative_distribution_v2.index).union(
    cumulative_distribution_bc.index).union(cumulative_distribution_saber.index).union(pd.Index([x_value]))

# Reindex with interpolation to fill gaps, including -0.41
cumulative_distribution_v1 = cumulative_distribution_v1.reindex(common_index).interpolate()
cumulative_distribution_v2 = cumulative_distribution_v2.reindex(common_index).interpolate()
cumulative_distribution_bc = cumulative_distribution_bc.reindex(common_index).interpolate()
cumulative_distribution_saber = cumulative_distribution_saber.reindex(common_index).interpolate()

# Interpolate y-values at x = -0.41 for each line
y_v1 = cumulative_distribution_v1.loc[x_value]
y_v2 = cumulative_distribution_v2.loc[x_value]
y_bc = cumulative_distribution_bc.loc[x_value]
y_saber = cumulative_distribution_saber.loc[x_value]

# Print interpolated values
print(f"GV1: {y_v1}, GV2: {y_v2}, BC: {y_bc}, SABER: {y_saber}")

# Plotting the cumulative distributions
plt.figure(figsize=(8, 6))

plt.plot(cumulative_distribution_v1.index, cumulative_distribution_v1, label='GV1', color='#a6611a')
plt.plot(cumulative_distribution_v2.index, cumulative_distribution_v2, label='GV2', color='#1f78b4')
plt.plot(cumulative_distribution_bc.index, cumulative_distribution_bc, label='BC', color='#7b3294')
plt.plot(cumulative_distribution_saber.index, cumulative_distribution_saber, label='BC SFDC', color='#33a02c')

plt.ylabel('Percentage of Gauge')
plt.gca().yaxis.set_label_coords(-0.05, 0.5)
plt.xlabel('KGE')
plt.title('Cumulative Distribution of KGE')
plt.legend()
plt.grid(True,linestyle='--', linewidth=0.5)

# Add the vertical line at x = -0.41
plt.axvline(x=-0.41, color='r', linestyle='--')

# # Annotate the intersection points with slight offsets to prevent overlap
# plt.annotate(f'{y_v1 * 100:.2f}', xy=(x_value, y_v1), xytext=(x_value - 0.5, y_v1),
#              arrowprops=dict(arrowstyle='->', lw=0.5), color='#a6611a')
# plt.annotate(f'{y_v2* 100:.2f}', xy=(x_value, y_v2), xytext=(x_value - 0.3, y_v2 - 0.07),
#              arrowprops=dict(arrowstyle='->', lw=0.5), color='#1f78b4')
# plt.annotate(f'{y_bc* 100:.2f}', xy=(x_value, y_bc), xytext=(x_value + 0.2, y_bc),
#              arrowprops=dict(arrowstyle='->', lw=0.5), color='#7b3294')
# plt.annotate(f'{y_saber* 100:.2f}', xy=(x_value, y_saber), xytext=(x_value + 0.2, y_saber),
#              arrowprops=dict(arrowstyle='->', lw=0.5), color='#33a02c')

# To specify the already calculated values if you wanna find the value run above code
plt.annotate(f'{24}', xy=(x_value, y_v1), xytext=(x_value - 0.4, y_v1),
             arrowprops=dict(arrowstyle='->', lw=0.5), color='#a6611a')
plt.annotate(f'{17.5}', xy=(x_value, y_v2), xytext=(x_value - 0.3, y_v2 - 0.07),
             arrowprops=dict(arrowstyle='->', lw=0.5), color='#1f78b4')
plt.annotate(f'{6}', xy=(x_value, y_bc), xytext=(x_value + 0.2, y_bc-0.01),
             arrowprops=dict(arrowstyle='->', lw=0.5), color='#7b3294')
plt.annotate(f'{17.3}', xy=(x_value, y_saber), xytext=(x_value + 0.2, y_saber),
             arrowprops=dict(arrowstyle='->', lw=0.5), color='#33a02c')

# Set the x-axis limits to include the -0.41 and adjust accordingly
plt.xlim(left=-5, right=1)
plt.ylim(bottom=0, top=1.05)

# Add custom ticks including -0.41 explicitly
current_ticks = plt.xticks()[0]
new_ticks = sorted(set(current_ticks).union([x_value]))
plt.xticks(new_ticks)

# Format x-axis ticks to display as integers where applicable
plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{int(x)}' if x.is_integer() else f'{x:.2f}'))
plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{y * 100:.0f}'))


# Save the plot with high resolution suitable for A4 insertion
plt.savefig('/Users/yubin/Library/CloudStorage/Box-Box/master thesis and what not/Geoglows V2 paper/Plots/Patterson/cumulative_distribution_KGE.png', dpi=1800, bbox_inches='tight')  # Save with 300 dpi

# Show the plot
plt.show()