In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from scipy.stats import spearmanr
import os
import warnings
warnings.filterwarnings('ignore')

DRAW_FIGURE = False

# NSE (Nash-Sutcliffe Efficiency)
def nse(observed, simulated):
    return 1 - (np.sum((observed - simulated) ** 2) / np.sum((observed - np.mean(observed)) ** 2))

# KGE (Kling-Gupta Efficiency)
def kge(observed, simulated):
    r = np.corrcoef(observed, simulated)[0, 1]
    alpha = np.mean(simulated) / np.mean(observed)
    beta  = np.std(simulated)/np.mean(simulated) / (np.std(observed)/np.mean(observed))
    return 1 - np.sqrt((r - 1) ** 2 + (alpha - 1) ** 2 + (beta - 1) ** 2)

# Relative RMSE
def relative_rmse(observed, simulated):
    rmse = np.sqrt(mean_squared_error(observed, simulated))
    return rmse / np.mean(observed)

def fit_function(w,  z0, u1, s1):
    return z0 + u1 * (w ** s1)

df_fit_all = pd.read_csv('3/fit_proba_modified_q50.csv')
df_med_all = pd.read_csv('3/hypso_med_modified_q50.csv')


folder_path = 'daily_long/daily_long'
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]


width_df = pd.read_csv('gages3000_glow_datemean_width_timeseries.csv')
# df_adcp = pds.merge(width_df,on=['stationid','time'],how = 'inner')
width_df['date'] = pd.to_datetime(width_df['date'], errors='coerce')  # Similarly for width_df

stationids = sorted(df_fit_all['stationid'].unique())

# 创建一个空的DataFrame用于存储所有数据
pds = pd.DataFrame()

# 从1979-01-01开始，逐日增加
start_date = pd.to_datetime('1979-01-01')

df_res = []
co, co_ori = 0, 0

for s in stationids:
    file_path = os.path.join(folder_path, s+'.csv')
    if not os.path.exists(file_path):
        print(f"File {file_path} does not exist. Skipping...")
        continue

    
    df_val = pd.read_csv(file_path)
    # 生成逐日增加的时间列
    num_days = len(df_val)
    time_range = pd.date_range(start=start_date, periods=num_days, freq='D')
    df_val['date'] = time_range
    df_val['stationid'] = s
    df_val = df_val.dropna(subset=['qobs'])
    df_val = df_val.merge(width_df[width_df['stationid']==s],on=['stationid','date'],how = 'inner')

    df_fit = df_fit_all[df_fit_all['stationid']==s]
    df_med = df_med_all[df_med_all['stationid']==s].reset_index(drop=True)
    
    w_low, w_high, slp = df_fit.iloc[0][['w_low','w_high','slp']]

    df_val_ori = df_val.drop_duplicates('date')
    df_val = df_val[(df_val['width']>=w_low) & (df_val['width']<=w_high)]
    df_val = df_val.drop_duplicates('date')
    co_ori += len(df_val_ori)
    co += len(df_val)
    
    # 向量化操作代替 apply
    idx_w = np.searchsorted(df_med['width'].values, df_val['width'].values)
    df_val['idx_w'] = idx_w
    df_val['width_i-1'] = df_med['width'].iloc[idx_w - 1].values
    df_val['width_i'] = df_med['width'].iloc[idx_w].values
    df_val['wse_i-1'] = df_med['wse'].iloc[idx_w - 1].values
    df_val['wse_i'] = df_med['wse'].iloc[idx_w].values
    df_val['area_i-1'] = df_med['area'].iloc[idx_w - 1].values

    # 使用向量化计算 area_hypso
    width_diff = df_val['width'] - df_val['width_i-1']
    width_diff_i = df_val['width_i'] - df_val['width_i-1']
    area_hypso = df_val['area_i-1'] + 0.5 * (df_val['width_i-1'] + df_val['width']) * \
                 (df_val['wse_i'] - df_val['wse_i-1']) * width_diff * (1 / width_diff_i)
    df_val['area_hypso'] = area_hypso

    # 使用向量化计算 Q_est
    df_val['Q_est'] = df_val['area_hypso'] ** (5.0 / 3.0) * df_val['width'] ** (-2.0 / 3.0) * \
                      slp ** 0.5 / 0.035
    df_val = df_val.dropna()
    if len(df_val)<10:
        print(f"station {s} length < 10. Skipping...")
        continue
        

    df_val['kge'] = kge(df_val['qobs'], df_val['Q_est'])
    df_val['nse'] = nse(df_val['qobs'], df_val['Q_est'])
    df_val['nrmse'] = relative_rmse(df_val['qobs'], df_val['Q_est'])
    df_res.append(df_val[['stationid','date','width','area_hypso','qobs','Q_est','kge','nse','nrmse']])


#print(co, co_ori, co/co_ori)
df_res = pd.concat(df_res, ignore_index=True)
df_res.to_csv('3/q_kge_med_modified_q50.csv', index=False)

station Brazil_10800000 length < 10. Skipping...
station Brazil_56846080 length < 10. Skipping...
station Brazil_83461000 length < 10. Skipping...
station EWA_9114227 length < 10. Skipping...
station GRDC_1112100 length < 10. Skipping...
station GRDC_1134200 length < 10. Skipping...
station GRDC_1134400 length < 10. Skipping...
station GRDC_1134450 length < 10. Skipping...
station GRDC_1134460 length < 10. Skipping...
station GRDC_1496500 length < 10. Skipping...
station GRDC_1531600 length < 10. Skipping...
station GRDC_1537100 length < 10. Skipping...
station GRDC_1737150 length < 10. Skipping...
station GRDC_1748500 length < 10. Skipping...
station GRDC_1749500 length < 10. Skipping...
station GRDC_1835900 length < 10. Skipping...
station GRDC_2369905 length < 10. Skipping...
station GRDC_2469112 length < 10. Skipping...
station GRDC_2469140 length < 10. Skipping...
station GRDC_2587220 length < 10. Skipping...
station GRDC_2906880 length < 10. Skipping...
station GRDC_2910606 lengt