In [1]:
import os
import glob
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

# --- 评估指标 ---
def nse(observed, simulated):
    """Nash-Sutcliffe Efficiency"""
    return 1 - (np.sum((observed - simulated) ** 2) / np.sum((observed - np.mean(observed)) ** 2))

def kge(observed, simulated):
    """Kling-Gupta Efficiency"""
    r = np.corrcoef(observed, simulated)[0, 1]
    alpha = np.mean(simulated) / np.mean(observed)
    beta = (np.std(simulated)/np.mean(simulated)) / (np.std(observed)/np.mean(observed))
    return 1 - np.sqrt((r - 1) ** 2 + (alpha - 1) ** 2 + (beta - 1) ** 2)

def relative_rmse(observed, simulated):
    """相对 RMSE"""
    rmse = np.sqrt(mean_squared_error(observed, simulated))
    return rmse / np.mean(observed)


# --- 数据准备 ---
df_fit_all = pd.read_csv('3/fit_proba_modified.csv')
sctd_df = pd.read_csv('pre-data/COMID-station-STCD.csv').drop_duplicates()
df_fit_all = df_fit_all.merge(sctd_df, on='STCD', how='inner')

df_med_all = pd.read_csv('3/hypso_med_modified.csv')
df_med_all = df_med_all.merge(sctd_df, on='STCD', how='inner')

width_df = pd.read_csv('pre-data/northchina_width_timeseries.csv')
sctd_df = sctd_df.rename(columns={'station': 'stnmpy'})
width_df = width_df.merge(sctd_df, on='stnmpy', how='inner')
width_df = width_df[width_df['width'] > 30]  # 只保留宽度大于30的数据

# 汇总每个站点每天的平均 width
width_df = width_df.groupby(['stnmpy', 'date', 'COMID', 'STCD'], as_index=False)['width'].mean()
width_df['date'] = pd.to_datetime(width_df['date'])

stationids = sorted(df_fit_all['STCD'].unique())
folder_path = r'H:\1-NorthRivers\2-Lin-data-NorthRivers\Observation'  # 注意：相对路径，不要加 \ 前缀

# --- 主处理流程 ---
df_res = []
co, co_ori = 0, 0

for s in stationids:
    # --- 加载观测数据 ---
    file_match = glob.glob(os.path.join(folder_path, f"*{int(s)}*.csv"))
    if not file_match:
        print(f"File for station {s} not found. Skipping...")
        continue
    file_path = file_match[0]
    df_val = pd.read_csv(file_path).rename(columns={'utctime': 'date'}).dropna(subset=['Q'])
    df_val['STCD'] = int(s)
    df_val['date'] = pd.to_datetime(df_val['date'])

    # --- 合并宽度数据 ---
    merged_df = df_val.merge(width_df, on=['STCD', 'date'], how='inner')

    # --- 提取拟合参数 ---
    df_fit = df_fit_all[df_fit_all['STCD'] == s]
    df_med = df_med_all[df_med_all['STCD'] == s].reset_index(drop=True)
    w_low, w_high, slp = df_fit.iloc[0][['w_low', 'w_high', 'slp']]

    # --- 宽度范围限制 ---
    merged_df_ori = merged_df.drop_duplicates('date')
    merged_df = merged_df[(merged_df['width'] >= w_low) & (merged_df['width'] <= w_high)]
    co_ori += len(merged_df_ori)
    co += len(merged_df)

    if len(merged_df) < 10:
        print(f"Station {s}: insufficient data after filtering. Skipping...")
        continue

    # --- 面积插值 ---
    idx_w = np.searchsorted(df_med['width'].values, merged_df['width'].values)
    merged_df['width_i-1'] = df_med['width'].iloc[idx_w - 1].values
    merged_df['width_i']   = df_med['width'].iloc[idx_w].values
    merged_df['wse_i-1']   = df_med['wse'].iloc[idx_w - 1].values
    merged_df['wse_i']     = df_med['wse'].iloc[idx_w].values
    merged_df['area_i-1']  = df_med['area'].iloc[idx_w - 1].values

    # 梯形插值估算面积
    width_diff = merged_df['width'] - merged_df['width_i-1']
    width_interval = merged_df['width_i'] - merged_df['width_i-1']
    delta_h = merged_df['wse_i'] - merged_df['wse_i-1']

    merged_df['area_hypso'] = merged_df['area_i-1'] + \
        0.5 * (merged_df['width_i-1'] + merged_df['width']) * delta_h * (width_diff / width_interval)

    # --- 基于曼宁公式计算 Q_est ---
    merged_df['Q_est'] = merged_df['area_hypso'] ** (5.0 / 3.0) * \
                         merged_df['width'] ** (-2.0 / 3.0) * slp ** 0.5 / 0.035

    merged_df = merged_df.dropna()

    # --- 评估拟合效果 ---
    merged_df['kge'] = kge(merged_df['Q'], merged_df['Q_est'])
    merged_df['nse'] = nse(merged_df['Q'], merged_df['Q_est'])
    merged_df['nrmse'] = relative_rmse(merged_df['Q'], merged_df['Q_est'])

    df_res.append(
        merged_df[['stnmpy', 'date', 'width', 'area_hypso', 'Q', 'Q_est', 'kge', 'nse', 'nrmse']]
    )

# --- 汇总结果 ---
df_res = pd.concat(df_res, ignore_index=True)
print(f"\n有效观测总数: {co} / 原始: {co_ori} ≈ {co/co_ori:.2%}")
df_res.to_csv('3/q_kge_med_modified.csv')


有效观测总数: 1412 / 原始: 1666 ≈ 84.75%


In [9]:
print(df_res['nse'].unique())
print(len(df_res[df_res['nse']>0]['nse'].unique()))
print(df_res['kge'].nunique())
print(df_res['kge'].unique())
print(len(df_res[df_res['kge']>0]['kge'].unique()))

[-9.07499268e-02 -1.24252966e+00  3.21973161e-03 -1.54646102e+00
 -6.12398985e-01 -6.01426381e+00 -6.57625604e-01 -7.87544011e-01
 -2.62857109e-02 -3.11401102e-01 -2.34204652e-01 -2.28877074e-01
  3.27000493e-02 -2.37492965e+00 -9.49017232e-02 -7.50990128e-02
  3.52753791e-01]
3
17
[ 1.51996711e-01 -3.38855697e-01  3.30977857e-01  1.11783068e-01
 -4.62632089e-02  1.53447068e-01 -1.66676214e-01  1.69818112e-04
  2.54824550e-01  1.97624132e-01 -1.51563098e-01 -6.74734057e-02
 -5.39132999e-02 -8.02984162e-01 -2.19915266e-01 -2.41281280e-01
  4.74086553e-01]
8


In [3]:
print(df_med_all['STCD'].nunique())
print(df_fit_all['STCD'].nunique())

17
17


In [4]:
print(width_df)

             stnmpy       date     COMID      STCD       width
0           baimasi 1986-06-10  43027826  41643400   96.204661
1           baimasi 1987-03-16  43027826  41643400  111.810379
2           baimasi 1987-07-06  43027826  41643400  112.402024
3           baimasi 1988-04-12  43027826  41643400   85.678452
4           baimasi 1989-02-10  43027826  41643400   71.576796
...             ...        ...       ...       ...         ...
4720  xingjiawopeng 2020-07-22  42061756  21101000   55.620608
4721  xingjiawopeng 2020-08-14  42061756  21101000   55.620608
4722  xingjiawopeng 2020-08-23  42061756  21101000   85.357167
4723  xingjiawopeng 2020-10-10  42061756  21101000   63.295530
4724  xingjiawopeng 2020-10-17  42061756  21101000   55.620608

[4725 rows x 5 columns]


In [5]:
print(width_df[width_df['stnmpy']=='humaqiao'])

        stnmpy       date     COMID      STCD       width
1106  humaqiao 1985-06-04  42012937  10400070  169.145506
1107  humaqiao 1986-06-07  42012937  10400070  162.353142
1108  humaqiao 1986-07-25  42012937  10400070  203.697088
1109  humaqiao 1987-06-10  42012937  10400070  203.697088
1110  humaqiao 1987-09-14  42012937  10400070  212.264973
...        ...        ...       ...       ...         ...
1335  humaqiao 2020-05-03  42012937  10400070  144.347137
1336  humaqiao 2020-05-26  42012937  10400070  124.989362
1337  humaqiao 2020-07-13  42012937  10400070  214.942742
1338  humaqiao 2020-07-22  42012937  10400070  131.605347
1339  humaqiao 2020-09-15  42012937  10400070  244.757891

[234 rows x 5 columns]


In [6]:
print(width_df['stnmpy'].nunique())

20


In [7]:
print(df_med_all['STCD'].nunique())
print(df_fit_all['STCD'].nunique())

17
17
