In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from scipy.stats import spearmanr
import os


import warnings
warnings.filterwarnings('ignore')

DRAW_FIGURE = False

# NSE (Nash-Sutcliffe Efficiency)
def nse(observed, simulated):
    return 1 - (np.sum((observed - simulated) ** 2) / np.sum((observed - np.mean(observed)) ** 2))

# KGE (Kling-Gupta Efficiency)
def kge(observed, simulated):
    r = np.corrcoef(observed, simulated)[0, 1]
    alpha = np.mean(simulated) / np.mean(observed)
    beta  = np.std(simulated)/np.mean(simulated) / (np.std(observed)/np.mean(observed))
    return 1 - np.sqrt((r - 1) ** 2 + (alpha - 1) ** 2 + (beta - 1) ** 2)

# Relative RMSE
def relative_rmse(observed, simulated):
    rmse = np.sqrt(mean_squared_error(observed, simulated))
    return rmse / np.mean(observed)

def fit_function(w,  z0, u1, s1):
    return z0 + u1 * (w ** s1)

folder_path = 'daily_long/daily_long'
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

width_df = pd.read_csv('gages3000_glow_datemean_width_timeseries.csv')
width_df['date'] = pd.to_datetime(width_df['date'], errors='coerce')  # Similarly for width_df

w50_df = pd.read_csv('1_3_w50.csv').drop_duplicates(subset=['stationid'])
q50_df = pd.read_csv('gages3000_GRFR_q50_slp.csv').drop_duplicates(subset=['stationid'])

stationids = sorted(w50_df['stationid'].unique())

# 创建一个空的DataFrame用于存储所有数据
pds = pd.DataFrame()

# 从1979-01-01开始，逐日增加
start_date = pd.to_datetime('1979-01-01')

df_res = []
co, co_ori = 0, 0

for s in stationids:
    file_path = os.path.join(folder_path, s+'.csv')
    if not os.path.exists(file_path):
        print(f"File {file_path} does not exist. Skipping...")
        continue

    
    df_val = pd.read_csv(file_path)
    # 生成逐日增加的时间列
    num_days = len(df_val)
    time_range = pd.date_range(start=start_date, periods=num_days, freq='D')
    df_val['date'] = time_range
    df_val['stationid'] = s
    df_val = df_val.dropna(subset=['qobs'])
    df_val = df_val.merge(width_df[width_df['stationid']==s],on=['stationid','date'],how = 'inner')
    w50 = w50_df[w50_df['stationid'] == s]['w50'].values[0]
    q50 = q50_df[q50_df['stationid'] == s]['q50'].values[0]
    coefficient = q50/(w50**(8.0/3.0))
    df_val['Q_est'] = df_val['width'] ** (8.0 / 3.0) *coefficient
    df_val = df_val.dropna()
    if len(df_val)<10:
        print(f"station {s} length < 10. Skipping...")
        continue
        

    df_val['kge'] = kge(df_val['qobs'], df_val['Q_est'])
    df_val['nse'] = nse(df_val['qobs'], df_val['Q_est'])
    df_val['nrmse'] = relative_rmse(df_val['qobs'], df_val['Q_est'])
    df_res.append(df_val[['stationid','date','width','qobs','Q_est','kge','nse','nrmse']])


#print(co, co_ori, co/co_ori)
df_res = pd.concat(df_res, ignore_index=True)
df_res.to_csv('W83_results.csv', index=False)

station Brazil_10800000 length < 10. Skipping...
station Brazil_13310000 length < 10. Skipping...
station Brazil_15325000 length < 10. Skipping...
station Brazil_16080000 length < 10. Skipping...
station Brazil_18119000 length < 10. Skipping...
station Brazil_18870000 length < 10. Skipping...
station Brazil_37730000 length < 10. Skipping...
station Brazil_54002990 length < 10. Skipping...
station Brazil_56846080 length < 10. Skipping...
station Brazil_58636000 length < 10. Skipping...
station Brazil_61884002 length < 10. Skipping...
station Brazil_64517000 length < 10. Skipping...
station Brazil_65775901 length < 10. Skipping...
station Brazil_83461000 length < 10. Skipping...
station Brazil_86298000 length < 10. Skipping...
station Canada_03QC003 length < 10. Skipping...
station Chile_07354001 length < 10. Skipping...
station Chile_08319001 length < 10. Skipping...
station EWA_9110302 length < 10. Skipping...
station EWA_9114186 length < 10. Skipping...
station EWA_9114217 length < 10

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as patches
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
import numpy as np

def KGE(y_true, y_pred):  # improved 2012
    correlation = np.corrcoef(y_true, y_pred)[0, 1]
    alpha = (np.std(y_pred)/np.mean(y_pred)) / (np.std(y_true)/np.mean(y_true))
    beta = np.mean(y_pred) / np.mean(y_true)
    return 1 - np.sqrt((correlation - 1)**2 + (alpha - 1)**2 + (beta - 1)**2)


# 定义计算NSE的函数
def NSE(observed, simulated):
    return 1 - np.sum((observed - simulated) ** 2) / np.sum((observed - np.mean(observed)) ** 2)

# 定义计算rRMSE的函数
def nRMSE(observed, simulated):
    rmse = np.sqrt(mean_squared_error(observed, simulated))
    return rmse / np.mean(observed)

# 定义计算CC的函数
def CC(observed, simulated):
    return pearsonr(observed, simulated)[0]  # 使用Spearman相关系数作为CC

valid = []
for s in stationids:
    df_station = df_res[df_res['stationid']==s]
    if len(df_station)<10:
        continue

    
    observed = df_station['qobs']
    simulated = df_station['Q_est']
    cc = CC(observed, simulated)
    nrmse = nRMSE(observed, simulated)
    kge = KGE(observed, simulated)
    nse = NSE(observed, simulated)
    pbias = (simulated.mean() / observed.mean() - 1) 
    rv    = simulated.std() / simulated.mean()/ (observed.std() / observed.mean())
    valid.append({'stationid': s, 'KGE': kge, 'NSE': nse,'NRMSE': nrmse,'CC': cc, 'pBIAS': pbias, 'RV': rv})


valid_df = pd.DataFrame(valid)
valid_df.to_csv('validation_W83.csv')

In [3]:
print(valid_df)

             stationid         KGE          NSE       NRMSE        CC  \
0     Australia_116006  -57.599752 -4896.136603   76.112521  0.658564   
1     Australia_121001    0.089217    -0.141457    1.601077  0.699918   
2     Australia_204007 -162.374104 -5405.266150  190.783133  0.490591   
3     Australia_208004  -10.908990  -266.547348   16.399229  0.447898   
4     Australia_710003 -185.049189 -1387.135318  251.117056  0.732697   
...                ...         ...          ...         ...       ...   
1477     USGS_14243000   -1.258430   -28.993931    2.857730  0.471227   
1478     USGS_14321000   -0.770388   -19.781651    3.603821  0.671250   
1479     USGS_15284000    0.322845    -0.161276    0.814663  0.605442   
1480     USGS_15292700    0.136718    -1.493127    0.791748  0.433433   
1481     USGS_15803000   -0.220857    -0.655247    1.266428  0.483098   

           pBIAS        RV  
0      58.598267  0.760283  
1      -0.831985  0.782567  
2     163.371514  0.233942  
3      