In [1]:
import pandas as pd
import glob
import os

# 读取所有5.*.csv文件
csv_files = glob.glob('5.*.csv')
if not csv_files:
    print("未找到5.*.csv文件,请确保文件在当前目录下")
    exit()

print(f"找到 {len(csv_files)} 个文件: {csv_files}")

# 存储所有数据
all_data = []

for file in csv_files:
    df = pd.read_csv(file)
    df = df[['stationid', 'kge', 'nrmse', 'nse']].drop_duplicates()
    
    for col in ['kge', 'nrmse', 'nse']:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    
    filename = os.path.splitext(file)[0]
    parts = filename.split('_')
    label = '_'.join(parts[-5:])
    df['source'] = label
    all_data.append(df)

# 合并所有数据
combined_df = pd.concat(all_data, ignore_index=True)

# 替换inf值为NaN
for col in ['kge', 'nrmse', 'nse']:
    combined_df[col] = combined_df[col].replace([float('inf'), float('-inf')], float('nan'))

sources = combined_df['source'].unique()

# ============== 生成汇总表格 ==============
summary_data = []

for src in sources:
    src_data = combined_df[combined_df['source'] == src]
    
    # KGE 统计
    kge_data = src_data['kge'].dropna()
    kge_count = len(kge_data)
    kge_median = kge_data.median()
    kge_mean = kge_data.mean()
    kge_pass_rate = (kge_data > 0).sum() / kge_count * 100 if kge_count > 0 else 0
    
    # NRMSE 统计
    nrmse_data = src_data['nrmse'].dropna()
    nrmse_count = len(nrmse_data)
    nrmse_median = nrmse_data.median()
    nrmse_mean = nrmse_data.mean()
    nrmse_pass_rate = (nrmse_data < 0.6).sum() / nrmse_count * 100 if nrmse_count > 0 else 0
    
    # NSE 统计
    nse_data = src_data['nse'].dropna()
    nse_count = len(nse_data)
    nse_median = nse_data.median()
    nse_mean = nse_data.mean()
    nse_pass_rate = (nse_data > 0).sum() / nse_count * 100 if nse_count > 0 else 0
    
    summary_data.append({
        'source': src,
        'kge_median': round(kge_median, 4),
        'kge_mean': round(kge_mean, 4),
        'kge_count': kge_count,
        'kge_>0(%)': round(kge_pass_rate, 2),
        'nrmse_median': round(nrmse_median, 4),
        'nrmse_mean': round(nrmse_mean, 4),
        'nrmse_count': nrmse_count,
        'nrmse_<0.6(%)': round(nrmse_pass_rate, 2),
        'nse_median': round(nse_median, 4),
        'nse_mean': round(nse_mean, 4),
        'nse_count': nse_count,
        'nse_>0(%)': round(nse_pass_rate, 2)
    })

# 创建DataFrame并输出
summary_df = pd.DataFrame(summary_data)

# 打印表格
print("\n" + "=" * 120)
print("【汇总统计表】")
print("=" * 120)
print(summary_df.to_string(index=False))

# 保存到CSV
output_file = '7.metrics_full_summary.csv'
summary_df.to_csv(output_file, index=False)
print(f"\n表格已保存为: {output_file}")

Using 32 workers for parallel processing
SWOT WIDTH-WSE VISUALIZATION

[1/8] Loading data...
[2/8] Calculating width statistics...
[3/8] Processing ORI data - selecting best nodes...
Original nodes: 441, Selected nodes: 280
[4/8] Applying QC to ORI data...
[5/8] Fitting ORI curves...
[6/8] Processing SMOOTH data...
[7/8] Fitting SMOOTH curves...
[8/8] Generating station visualizations...
Total stations to visualize: 197
  Processing station 50/197...
  Processing station 100/197...
  Processing station 150/197...

Visualization complete!
Output directory: station_figures/
Total time: 172.83s
