In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress, spearmanr
from scipy.optimize import least_squares
from sklearn.metrics import mean_squared_error
import os
import warnings
import multiprocessing as mp
from functools import partial
import itertools
from numba import jit, prange
import gc

warnings.filterwarnings('ignore')

# ============================================================================
# 全局配置
# ============================================================================
N_WORKERS = min(mp.cpu_count() - 1, 64)  # 80
print(f"Using {N_WORKERS} workers for parallel processing")

# ============================================================================
# 【新增】不确定性传播配置
# ============================================================================
# 像素分辨率 (米)
PIXEL_RESOLUTION = 30.0
# 每岸抖动范围: ±1像素，两岸共 ±2像素 对宽度的影响
WIDTH_JITTER_PER_BANK = PIXEL_RESOLUTION  # 30m per bank
# 蒙特卡洛采样次数
N_MC_SAMPLES = 100

# ============================================================================
# 全局变量（用于多进程共享数据）
# ============================================================================
_GLOBAL_NODE_DATA = {}
_GLOBAL_WIDTH_STATS = None
_GLOBAL_SWOT_DATA = None
_GLOBAL_FITTER = None
_GLOBAL_QC_DATA = None

# ============================================================================
# Numba加速的核心计算函数
# ============================================================================
@jit(nopython=True, parallel=True, cache=True)
def compute_inconsistency_matrix(w, h):
    """使用Numba加速计算不一致性矩阵"""
    n = len(w)
    inverse = np.zeros(n, dtype=np.int64)
    for i in prange(n):
        count = 0
        for j in range(n):
            w_diff = w[i] - w[j]
            h_diff = h[i] - h[j]
            if w_diff * h_diff < 0:
                count += 1
        inverse[i] = count
    return inverse

@jit(nopython=True, cache=True)
def calculate_areas_numba(w_list, h_list, w50, a50):
    """使用Numba加速面积计算"""
    n = len(w_list)
    areas = np.full(n, np.nan)
    
    # 边界检查：如果数据点太少，直接返回
    if n < 2:
        return areas
    
    idx50 = np.searchsorted(w_list, w50)
    if idx50 >= n:
        idx50 = n - 1
    if idx50 < 1:
        idx50 = 1
    
    # 防止除零错误
    denom = w_list[idx50] - w_list[idx50-1]
    if abs(denom) < 1e-10:
        # 如果宽度差太小，使用平均值
        h50 = (h_list[idx50-1] + h_list[idx50]) / 2.0
    else:
        h50 = (h_list[idx50-1] * (w_list[idx50] - w50) +
               h_list[idx50] * (w50 - w_list[idx50-1])) / denom
    
    areas[idx50] = a50 + 0.5 * (w50 + w_list[idx50]) * (h_list[idx50] - h50)
    
    for i in range(idx50 + 1, n):
        areas[i] = areas[i-1] + 0.5 * (w_list[i-1] + w_list[i]) * \
                  (h_list[i] - h_list[i-1])
    
    for i in range(idx50 - 1, -1, -1):
        areas[i] = areas[i+1] - 0.5 * (w_list[i+1] + w_list[i]) * \
                  (h_list[i+1] - h_list[i])
    
    return areas

@jit(nopython=True, cache=True)
def nse_numba(observed, simulated):
    """Numba加速的NSE计算"""
    obs_mean = np.mean(observed)
    numerator = np.sum((observed - simulated)**2)
    denominator = np.sum((observed - obs_mean)**2)
    if denominator == 0:
        return np.nan
    return 1 - numerator / denominator

@jit(nopython=True, cache=True)
def kge_numba(observed, simulated):
    """Numba加速的KGE计算"""
    obs_mean = np.mean(observed)
    sim_mean = np.mean(simulated)
    obs_std = np.std(observed)
    sim_std = np.std(simulated)
    
    if obs_std == 0 or sim_std == 0 or obs_mean == 0 or sim_mean == 0:
        return np.nan
    
    n = len(observed)
    cov = np.sum((observed - obs_mean) * (simulated - sim_mean)) / n
    r = cov / (obs_std * sim_std)
    
    alpha = sim_mean / obs_mean
    beta = sim_std / obs_std
    
    return 1 - np.sqrt((r - 1)**2 + (alpha - 1)**2 + (beta - 1)**2)


# ============================================================================
# 【新增】蒙特卡洛不确定性传播函数
# ============================================================================
@jit(nopython=True, cache=True)
def monte_carlo_discharge_numba(width_obs, curve_width, curve_area, slope, 
                                 n_samples, width_jitter_range):
    """
    使用Numba加速的蒙特卡洛流量不确定性计算
    
    Parameters:
    -----------
    width_obs : float
        观测宽度
    curve_width : np.ndarray
        曲线宽度数组
    curve_area : np.ndarray
        曲线面积数组
    slope : float
        河道坡降
    n_samples : int
        蒙特卡洛采样次数
    width_jitter_range : float
        宽度抖动范围 (两岸各1像素的不确定性，总范围 = 2*30 = 60m)
    
    Returns:
    --------
    q_samples : np.ndarray
        流量采样数组
    """
    q_samples = np.zeros(n_samples)
    
    for i in range(n_samples):
        # 对宽度添加随机抖动
        # 两岸各独立抖动 ±1 像素
        # 使用均匀分布 [-jitter_range, +jitter_range] 模拟离散像素效应
        jitter = (np.random.random() * 2.0 - 1.0) * width_jitter_range
        w_jittered = width_obs + jitter
        
        # 确保宽度在曲线范围内
        if w_jittered < curve_width[0]:
            w_jittered = curve_width[0]
        elif w_jittered > curve_width[-1]:
            w_jittered = curve_width[-1]
        
        # 线性插值获取面积
        idx = np.searchsorted(curve_width, w_jittered)
        if idx == 0:
            area = curve_area[0]
        elif idx >= len(curve_width):
            area = curve_area[-1]
        else:
            # 线性插值
            w0, w1 = curve_width[idx-1], curve_width[idx]
            a0, a1 = curve_area[idx-1], curve_area[idx]
            if w1 - w0 > 0:
                area = a0 + (a1 - a0) * (w_jittered - w0) / (w1 - w0)
            else:
                area = a0
        
        # 计算流量 (曼宁公式)
        if area > 0 and w_jittered > 0 and slope > 0:
            q_samples[i] = (area ** (5.0/3.0)) * (w_jittered ** (-2.0/3.0)) * (slope ** 0.5) / 0.035
        else:
            q_samples[i] = np.nan
    
    return q_samples


@jit(nopython=True, cache=True)
def monte_carlo_area_numba(width_obs, curve_width, curve_area, n_samples, width_jitter_range):
    """
    使用Numba加速的蒙特卡洛面积不确定性计算
    """
    area_samples = np.zeros(n_samples)
    
    for i in range(n_samples):
        jitter = (np.random.random() * 2.0 - 1.0) * width_jitter_range
        w_jittered = width_obs + jitter
        
        # 确保宽度在曲线范围内
        if w_jittered < curve_width[0]:
            w_jittered = curve_width[0]
        elif w_jittered > curve_width[-1]:
            w_jittered = curve_width[-1]
        
        # 线性插值获取面积
        idx = np.searchsorted(curve_width, w_jittered)
        if idx == 0:
            area_samples[i] = curve_area[0]
        elif idx >= len(curve_width):
            area_samples[i] = curve_area[-1]
        else:
            w0, w1 = curve_width[idx-1], curve_width[idx]
            a0, a1 = curve_area[idx-1], curve_area[idx]
            if w1 - w0 > 0:
                area_samples[i] = a0 + (a1 - a0) * (w_jittered - w0) / (w1 - w0)
            else:
                area_samples[i] = a0
    
    return area_samples


# ============================================================================
# 全局并行处理函数（必须在模块级别定义才能被pickle）
# ============================================================================
def _compute_node_corr(node_id):
    """计算单个节点的秩相关系数"""
    global _GLOBAL_NODE_DATA
    data = _GLOBAL_NODE_DATA.get(node_id)
    if data is None or len(data['width']) < 5:
        return (node_id, data['stationid'] if data else None, 0.0)
    
    try:
        corr, _ = spearmanr(data['width'], data['wse'])
        if np.isnan(corr):
            corr = 0.0
    except:
        corr = 0.0
    
    return (node_id, data['stationid'], corr)

def _process_qc_station(stationid):
    """处理单个站点的质控（全局函数版本）"""
    global _GLOBAL_WIDTH_STATS, _GLOBAL_SWOT_DATA
    
    if _GLOBAL_WIDTH_STATS is None or stationid not in _GLOBAL_WIDTH_STATS.index:
        return None
    
    df = _GLOBAL_SWOT_DATA[_GLOBAL_SWOT_DATA['stationid'] == stationid].copy()
    if len(df) < 5:
        return None
    
    # 步骤1: 不确定度筛选
    df['width_u_r'] = df['width_u'] / df['width']
    df1 = df[(df['wse_u'] <= 0.4) & (df['width_u_r'] <= 0.1)]
    if len(df1) < 5:
        return None
    
    # 步骤2: 顺序一致性剔除
    df2 = _remove_inconsistent_points(df1)
    if len(df2) < 5:
        return None
    
    # 步骤3: 离群值剔除
    w_low = _GLOBAL_WIDTH_STATS.loc[stationid, 'w_low']
    w_high = _GLOBAL_WIDTH_STATS.loc[stationid, 'w_high']
    d_bankfull = 0.27 * (w_high / 7.2) ** 0.6
    h50 = df2['wse'].median()
    
    df3 = df2[(df2['wse'] <= h50 + d_bankfull) & (df2['wse'] >= h50 - d_bankfull)]
    
    return df3 if len(df3) >= 5 else None

def _remove_inconsistent_points(df, inverse_ratio_thresh=0.5):
    """顺序一致性剔除"""
    indices_to_keep = list(df.index)
    
    while True:
        n = len(indices_to_keep)
        if n < 5:
            break
        
        df_current = df.loc[indices_to_keep]
        w = df_current['width'].values.astype(np.float64)
        h = df_current['wse'].values.astype(np.float64)
        
        inverse = compute_inconsistency_matrix(w, h)
        
        idx_max = np.argmax(inverse)
        if inverse[idx_max] / n < inverse_ratio_thresh:
            break
        
        indices_to_keep.pop(idx_max)
    
    return df.loc[indices_to_keep]

def _fit_station_wrapper(stationid):
    """拟合单个站点（全局函数版本）"""
    global _GLOBAL_FITTER, _GLOBAL_QC_DATA
    
    df_station = _GLOBAL_QC_DATA[_GLOBAL_QC_DATA['stationid'] == stationid]
    if len(df_station) == 0:
        return None
    
    comid = df_station.iloc[0]['COMID']
    return _GLOBAL_FITTER.fit_station(df_station, stationid, comid)


# ============================================================================
# 【修改】验证函数 - 添加不确定性传播
# ============================================================================
def _validate_station_wrapper(args):
    """验证单个站点（全局函数版本）- 包含不确定性传播"""
    # 【修改】添加 skip_width_filter 参数
    s, df_hypso, df_width, df_val_folder, df_fit, start_date, skip_width_filter = args
    
    file_path = os.path.join(df_val_folder, f'{s}.csv')
    if not os.path.exists(file_path):
        return None
    
    try:
        df_val = pd.read_csv(file_path)
        num_days = len(df_val)
        df_val['date'] = pd.date_range(start=start_date, periods=num_days, freq='D')
        df_val['stationid'] = s
        df_val = df_val.dropna(subset=['qobs'])
        
        df_width_s = df_width[df_width['stationid'] == s]
        df_val = df_val.merge(df_width_s, on=['stationid', 'date'], how='inner')
        
        df_curve = df_hypso[df_hypso['stationid'] == s].reset_index(drop=True)
        station_fit = df_fit[df_fit['stationid'] == s]
        if station_fit.empty or df_curve.empty:
            return None
        
        row = station_fit.iloc[0]
        w_low, w_high, slp = row['w_low'], row['w_high'], row['slp']
        
        # 【修改】根据 skip_width_filter 决定是否进行宽度筛选
        if skip_width_filter:
            # datemean模式：不对width做筛选，只去重
            df_val = df_val.drop_duplicates('date')
        else:
            # node模式：正常进行宽度筛选
            df_val = df_val[
                (df_val['width'] >= w_low) &
                (df_val['width'] <= w_high)
            ].drop_duplicates('date')
        
        if len(df_val) < 10:
            return None
        
        # 获取曲线数据
        curve_width = df_curve['width'].values.astype(np.float64)
        curve_area = df_curve['area'].values.astype(np.float64)
        val_width = df_val['width'].values.astype(np.float64)
        
        # ================================================================
        # 【新增】不确定性传播：蒙特卡洛模拟
        # ================================================================
        # 宽度不确定性：共抖动1像素
        width_jitter_total = WIDTH_JITTER_PER_BANK  # 60m total range
        
        n_obs = len(df_val)
        
        # 存储结果
        q_median = np.zeros(n_obs)
        q_lower_95 = np.zeros(n_obs)  # 2.5th percentile
        q_upper_95 = np.zeros(n_obs)  # 97.5th percentile
        q_lower_50 = np.zeros(n_obs)  # 25th percentile
        q_upper_50 = np.zeros(n_obs)  # 75th percentile
        area_median = np.zeros(n_obs)
        area_lower_95 = np.zeros(n_obs)
        area_upper_95 = np.zeros(n_obs)
        
        # 对每个观测进行蒙特卡洛模拟
        for i in range(n_obs):
            w_obs = val_width[i]
            
            # 蒙特卡洛采样计算流量
            q_samples = monte_carlo_discharge_numba(
                w_obs, curve_width, curve_area, slp,
                N_MC_SAMPLES, width_jitter_total
            )
            
            # 蒙特卡洛采样计算面积
            area_samples = monte_carlo_area_numba(
                w_obs, curve_width, curve_area, 
                N_MC_SAMPLES, width_jitter_total
            )
            
            # 计算流量统计量
            q_valid = q_samples[~np.isnan(q_samples)]
            if len(q_valid) > 0:
                q_median[i] = np.median(q_valid)
                q_lower_95[i] = np.percentile(q_valid, 2.5)
                q_upper_95[i] = np.percentile(q_valid, 97.5)
                q_lower_50[i] = np.percentile(q_valid, 25)
                q_upper_50[i] = np.percentile(q_valid, 75)
            else:
                q_median[i] = np.nan
                q_lower_95[i] = np.nan
                q_upper_95[i] = np.nan
                q_lower_50[i] = np.nan
                q_upper_50[i] = np.nan
            
            # 计算面积统计量
            area_valid = area_samples[~np.isnan(area_samples)]
            if len(area_valid) > 0:
                area_median[i] = np.median(area_valid)
                area_lower_95[i] = np.percentile(area_valid, 2.5)
                area_upper_95[i] = np.percentile(area_valid, 97.5)
            else:
                area_median[i] = np.nan
                area_lower_95[i] = np.nan
                area_upper_95[i] = np.nan
        
        # 原始点估计（不含不确定性）
        area_hypso = np.interp(val_width, curve_width, curve_area)
        q_est_point = (area_hypso**(5/3) * val_width**(-2/3) * slp**0.5 / 0.035)
        
        # 添加到数据框
        df_val['area_hypso'] = area_hypso
        df_val['Q_est'] = q_est_point
        # 【新增】可信区间相关列
        df_val['Q_est_median'] = q_median
        df_val['Q_est_lower_95'] = q_lower_95
        df_val['Q_est_upper_95'] = q_upper_95
        df_val['Q_est_lower_50'] = q_lower_50
        df_val['Q_est_upper_50'] = q_upper_50
        df_val['area_median'] = area_median
        df_val['area_lower_95'] = area_lower_95
        df_val['area_upper_95'] = area_upper_95
        
        df_val = df_val.dropna(subset=['Q_est', 'qobs'])
        if len(df_val) < 10:
            return None
        
        obs = df_val['qobs'].values.astype(np.float64)
        sim = df_val['Q_est'].values.astype(np.float64)
        sim_median = df_val['Q_est_median'].values.astype(np.float64)
        
        # 计算性能指标（使用点估计）
        kge_val = kge_numba(obs, sim)
        nse_val = nse_numba(obs, sim)
        rmse = np.sqrt(np.mean((obs - sim)**2))
        nrmse_val = rmse / np.mean(obs)
        
        # 【新增】计算性能指标（使用中值估计）
        kge_val_median = kge_numba(obs, sim_median)
        nse_val_median = nse_numba(obs, sim_median)
        
        # ================================================================
        # 【新增】计算覆盖率 (Coverage)
        # ================================================================
        # 95% 可信区间覆盖率
        lower_95 = df_val['Q_est_lower_95'].values
        upper_95 = df_val['Q_est_upper_95'].values
        coverage_95 = np.mean((obs >= lower_95) & (obs <= upper_95))
        
        # 50% 可信区间覆盖率
        lower_50 = df_val['Q_est_lower_50'].values
        upper_50 = df_val['Q_est_upper_50'].values
        coverage_50 = np.mean((obs >= lower_50) & (obs <= upper_50))
        
        # 【新增】计算可信区间宽度
        ci_width_95 = np.mean(upper_95 - lower_95)
        ci_width_50 = np.mean(upper_50 - lower_50)
        mean_obs = np.mean(obs)
        if mean_obs > 0 and np.isfinite(ci_width_95):
            relative_ci_width_95 = ci_width_95 / mean_obs
        else:
            relative_ci_width_95 = np.nan
        
        # 添加指标到数据框
        df_val['kge'] = kge_val
        df_val['nse'] = nse_val
        df_val['nrmse'] = nrmse_val
        # 【新增】不确定性相关指标
        df_val['kge_median'] = kge_val_median
        df_val['nse_median'] = nse_val_median
        df_val['coverage_95'] = coverage_95
        df_val['coverage_50'] = coverage_50
        df_val['ci_width_95'] = ci_width_95
        df_val['ci_width_50'] = ci_width_50
        df_val['relative_ci_width_95'] = relative_ci_width_95
        
        # 【修改】返回更多列
        return df_val[['stationid', 'date', 'width', 
                       'area_hypso', 'area_median', 'area_lower_95', 'area_upper_95',
                       'qobs', 'Q_est', 'Q_est_median', 
                       'Q_est_lower_95', 'Q_est_upper_95',
                       'Q_est_lower_50', 'Q_est_upper_50',
                       'kge', 'nse', 'nrmse',
                       'kge_median', 'nse_median',
                       'coverage_95', 'coverage_50',
                       'ci_width_95', 'ci_width_50', 'relative_ci_width_95']]
    except Exception as e:
        print(f"Error processing station {s}: {e}")
        import traceback
        traceback.print_exc()
        return None


def _rolling_median_group(group):
    """滑动中值处理"""
    group = group.sort_values('date')
    group['width'] = group['width'].rolling(window=5, center=True, min_periods=1).median()
    group['wse'] = group['wse'].rolling(window=5, center=True, min_periods=1).median()
    return group

# ============================================================================
# 模块1: 数据统计工具 (修改为IQR方法)
# ============================================================================
class WidthStatistics:
    """计算河流宽度的统计特征 - 使用IQR方法"""
    
    # 定义IQR配置：{选项: IQR倍数}
    # w_low = Q1 - k * IQR
    # w_high = Q3 + k * IQR
    IQR_CONFIG = {
        '1.0': 1.0,
        '1.5': 1.5,
        '2.0': 2.0,
        '2.5': 2.5,
        '3.0': 3.0,
        '4.0': 4.0
    }
    
    @staticmethod
    def calculate_width_iqr(df, min_width=30, valid_ratio=0.95, min_iqr=5):
        """
        计算每个站点的宽度IQR范围
        
        Parameters:
        -----------
        df : DataFrame
            输入数据
        min_width : float
            最小有效宽度
        valid_ratio : float
            有效数据比例阈值
        min_iqr : float
            最小IQR阈值，当IQR小于此值时跳过该站点
        """
        stationids = df['stationid'].unique()
        result_data = []
        skipped_stations = []
        
        for stationid in stationids:
            station_data_all = df[df['stationid'] == stationid]['width'].dropna()
            station_data = station_data_all[station_data_all >= min_width]
            
            if len(station_data_all) == 0:
                continue
            if len(station_data) / len(station_data_all) < valid_ratio:
                continue
            
            if len(station_data) > 10:
                w50 = station_data.median()
                
                # 计算Q1, Q3和IQR
                q1 = station_data.quantile(0.25)
                q3 = station_data.quantile(0.75)
                iqr = q3 - q1
                
                # 检查IQR是否足够大，如果Q1和Q3太接近则跳过该站点
                if iqr < min_iqr:
                    skipped_stations.append((stationid, q1, q3, iqr))
                    continue
                
                row_data = {
                    'stationid': stationid,
                    'w50': w50,
                    'Q1': q1,
                    'Q3': q3,
                    'IQR': iqr
                }
                
                # 动态计算所有IQR倍数配置的范围
                for key, k in WidthStatistics.IQR_CONFIG.items():
                    w_low = max(q1 - k * iqr, min_width)  # 确保不低于最小宽度
                    w_high = q3 + k * iqr
                    row_data[f'w_low_iqr{key}'] = w_low
                    row_data[f'w_high_iqr{key}'] = w_high
                
                result_data.append(row_data)
        
        # 打印跳过的站点信息
        if skipped_stations:
            print(f"Skipped {len(skipped_stations)} stations due to small IQR (< {min_iqr}):")
            for sid, q1, q3, iqr in skipped_stations[:10]:  # 只打印前10个
                print(f"  Station {sid}: Q1={q1:.2f}, Q3={q3:.2f}, IQR={iqr:.2f}")
            if len(skipped_stations) > 10:
                print(f"  ... and {len(skipped_stations) - 10} more stations")
        
        return pd.DataFrame(result_data)
    
    @staticmethod
    def get_iqr_columns(b_option):
        """根据B选项获取对应的IQR列名"""
        if b_option not in WidthStatistics.IQR_CONFIG:
            raise ValueError(f"Invalid B option: {b_option}. Valid options: {list(WidthStatistics.IQR_CONFIG.keys())}")
        
        return f'w_low_iqr{b_option}', f'w_high_iqr{b_option}'
    
    # 保留旧方法以兼容（如果需要）
    @staticmethod
    def calculate_width_percentiles(df, min_width=30, valid_ratio=0.95, min_iqr=5):
        """计算每个站点的宽度分位数（保留用于兼容）"""
        return WidthStatistics.calculate_width_iqr(df, min_width, valid_ratio, min_iqr)

# ============================================================================
# 模块2: 节点选择
# ============================================================================
class NodeSelector:
    """为每个站点选择最优节点"""
    
    @staticmethod
    def select_best_nodes(df_swot, min_data_points=10):
        """为每个站点选择秩相关系数最大的节点"""
        global _GLOBAL_NODE_DATA
        
        # 向量化计算节点数据量
        node_counts = df_swot.groupby('node_id').size()
        valid_nodes = node_counts[node_counts >= min_data_points].index
        df_swot = df_swot[df_swot['node_id'].isin(valid_nodes)].copy()
        
        # 预计算每个节点的数据
        _GLOBAL_NODE_DATA = {}
        for node_id, group in df_swot.groupby('node_id'):
            _GLOBAL_NODE_DATA[node_id] = {
                'width': group['width'].values,
                'wse': group['wse'].values,
                'stationid': group['stationid'].iloc[0]
            }
        
        node_ids = list(_GLOBAL_NODE_DATA.keys())
        
        # 使用进程池并行计算
        with mp.Pool(processes=N_WORKERS) as pool:
            results = pool.map(_compute_node_corr, node_ids)
        
        df_node = pd.DataFrame(results, columns=['node_id', 'stationid', 'rank_corr'])
        df_node = df_node.dropna(subset=['stationid'])
        
        # 选择每个站点的最大秩相关系数节点
        max_idx = df_node.groupby('stationid')['rank_corr'].idxmax()
        df_node_rmax = df_node.loc[max_idx]
        
        # 筛选数据
        df_filtered = df_swot[df_swot['node_id'].isin(df_node_rmax['node_id'])]
        
        print(f"Original nodes: {len(df_node)}, Selected nodes: {len(df_node_rmax)}")
        
        # 清理全局变量
        _GLOBAL_NODE_DATA = {}
        
        return df_filtered, df_node_rmax

# ============================================================================
# 模块3: 数据质控
# ============================================================================
class DataQualityControl:
    """SWOT数据质量控制"""
    
    def __init__(self, width_stats):
        self.width_stats = width_stats.set_index('stationid')
    
    def apply_qc(self, df_swot, draw_figure=False, output_folder=None):
        """应用完整的质量控制流程"""
        global _GLOBAL_WIDTH_STATS, _GLOBAL_SWOT_DATA
        
        _GLOBAL_WIDTH_STATS = self.width_stats
        _GLOBAL_SWOT_DATA = df_swot
        
        stationids = df_swot['stationid'].unique()
        
        # 使用进程池并行处理
        with mp.Pool(processes=N_WORKERS) as pool:
            results = pool.map(_process_qc_station, stationids)
        
        # 清理全局变量
        _GLOBAL_WIDTH_STATS = None
        _GLOBAL_SWOT_DATA = None
        
        # 合并结果
        results = [df for df in results if df is not None]
        
        if not results:
            return pd.DataFrame()
        
        result = pd.concat(results)
        result = result.drop_duplicates(subset=['node_id', 'date', 'stationid'])
        result.reset_index(drop=True, inplace=True)
        
        return result

# ============================================================================
# 模块4: 水位-面积曲线拟合
# ============================================================================
class HydraulicCurveFitter:
    """拟合水位-宽度关系曲线"""
    
    def __init__(self, width_stats, river_attrs, skip_width_filter=False):
        """
        Parameters:
        -----------
        width_stats : DataFrame
            宽度统计数据
        river_attrs : DataFrame
            河流属性数据
        skip_width_filter : bool
            是否跳过宽度筛选（用于datemean模式）
        """
        self.width_stats = width_stats.set_index('stationid')
        self.river_attrs = river_attrs.set_index('COMID')
        self.skip_width_filter = skip_width_filter
        
        self.R_list = np.array([0.5, 1, 2, 4, 8])
        self.GAP_list = np.array([-0.3,-0.1,0,0.1,0.3])
        self.W_list = np.array([0.3, 0.5, 0.7])
    
    @staticmethod
    def power_function(params, X, y):
        wse0, a, b = params
        return y - (wse0 + a * X**b)
    
    def loss_function(self, z, weight, n_swot):
        rho = np.zeros((3, len(z)))
        rho[0] = 2 * ((1 + z)**0.5 - 1)
        rho[1] = (1 + z)**(-0.5)
        rho[2] = -0.5 * (1 + z)**(-1.5)
        
        factor = (n_swot - 2) / weight * (1 - weight) / 2
        rho[:, 0] *= factor
        rho[:, 1] *= factor
        
        return rho
    
    def calculate_h50(self, df, w50):
        df = df.copy()
        df['w50_diff'] = np.abs(df['width'] - w50)
        df = df.sort_values('w50_diff')
        
        xdata = df.iloc[:5]['width'].values
        ydata = df.iloc[:5]['wse'].values
        xdata_uni = np.unique(xdata)
        
        if len(xdata_uni) < 2:
            return df.iloc[:5]['wse'].mean()
        
        res = linregress(xdata, ydata)
        if res[0] >= 0:
            return res[0] * w50 + res[1]
        else:
            return df.iloc[:5]['wse'].mean()
    
    def fit_station(self, df_station, stationid, comid):
        """拟合单个站点的水位-宽度关系"""
        if stationid not in self.width_stats.index:
            return None
        if comid not in self.river_attrs.index:
            return None
        
        try:
            q50 = self.river_attrs.loc[comid, 'q50_weighted']
            slp = self.river_attrs.loc[comid, 'slope']
            w50, w_low, w_high = self.width_stats.loc[stationid, ['w50', 'w_low', 'w_high']]
            d_bankfull = 0.27 * (w_high / 7.2)**0.6
            
            h50 = self.calculate_h50(df_station, w50)
            a50 = (q50 * 0.035 / slp**0.5 * w50**(2/3))**(3/5)
            
            # 根据skip_width_filter决定是否进行宽度筛选
            if self.skip_width_filter:
                # datemean模式：不对width做筛选
                df_filtered = df_station.copy()
            else:
                # node模式：正常进行宽度筛选
                df_filtered = df_station[
                    (df_station['width'] >= w_low) &
                    (df_station['width'] <= w_high)
                ]
            
            if len(df_filtered) < 3:
                return None
            
            swot_wsemax = df_filtered.sort_values('wse', ascending=False).iloc[0]
            d_wsemax = 0.27 * (swot_wsemax['width'] / 7.2)**0.6
            
            results = []
            for r_low in self.R_list:
                for gap in self.GAP_list:
                    for weight in self.W_list:
                        result = self._fit_single_config(
                            df_filtered, r_low, gap, weight,
                            w_low, w_high, w50, h50, a50,
                            swot_wsemax, d_bankfull, d_wsemax, slp, q50
                        )
                        if result is not None:
                            result.update({
                                'stationid': stationid,
                                'COMID': comid,
                                'R': r_low,
                                'GAP': gap,
                                'W': weight
                            })
                            results.append(result)
            
            return pd.DataFrame(results) if results else None
        except Exception as e:
            return None
    
    def _fit_single_config(self, df, r_low, gap, weight, w_low, w_high, w50,
                          h50, a50, swot_wsemax, d_bankfull, d_wsemax, slp, q50):
        """拟合单个参数配置"""
        a_low = a50 * (r_low + 1) / r_low / w50**(r_low + 1)
        h0 = h50 - a_low * w50**r_low
        h_low = h0 + a_low * w_low**r_low
        h_high = swot_wsemax['wse'] + (d_bankfull - d_wsemax) + gap * d_bankfull
        
        xdata = np.insert(df['width'].values, 0, [w_low, w_high])
        ydata = np.insert(df['wse'].values, 0, [h_low, h_high])
        a_default = (h_high - h0) / w_high**2
        
        n_swot = len(df)
        
        def loss_wrapper(z):
            return self.loss_function(z, weight, n_swot)
        
        try:
            ls = least_squares(
                self.power_function,
                x0=[h0, a_default, 2],
                loss=loss_wrapper,
                args=(xdata, ydata),
                max_nfev=100
            )
            
            if ls.status > 0:
                wse0, a, b = ls.x
                if a * b < 0:
                    return None
                
                return {
                    'wse0': wse0, 'a': a, 'b': b,
                    'a50': a50, 'w50': w50, 'q50': q50,
                    'w_low': w_low, 'w_high': w_high,
                    'h_low': h_low, 'h_high': h_high,
                    'slp': slp
                }
        except:
            pass
        
        return None
    
    def fit_all_stations(self, df_qc):
        """并行拟合所有站点"""
        global _GLOBAL_FITTER, _GLOBAL_QC_DATA
        
        _GLOBAL_FITTER = self
        _GLOBAL_QC_DATA = df_qc
        
        unique_stations = df_qc['stationid'].unique()
        
        # 使用进程池并行处理
        with mp.Pool(processes=N_WORKERS) as pool:
            results = pool.map(_fit_station_wrapper, unique_stations)
        
        # 清理全局变量
        _GLOBAL_FITTER = None
        _GLOBAL_QC_DATA = None
        
        results = [df for df in results if df is not None]
        
        if results:
            return pd.concat(results, ignore_index=True)
        return None

# ============================================================================
# 模块5: 水位-面积曲线生成
# ============================================================================
class HypsometricCurveGenerator:
    """生成水位-面积关系曲线"""
    
    @staticmethod
    def generate_curves(df_fit, n_points=100):
        """为所有站点生成中值水位-面积曲线"""
        stationids = sorted(df_fit['stationid'].unique())
        df_res = []
        
        for s in stationids:
            df_station = df_fit[df_fit['stationid'] == s]
            w_low, w_high, w50, a50 = df_station.iloc[0][
                ['w_low', 'w_high', 'w50', 'a50']
            ]
            
            # 边界检查：跳过无效的宽度范围
            if w_high <= w_low or abs(w_high - w_low) < 1e-6:
                print(f"Warning: Skipping station {s} due to invalid width range (w_low={w_low}, w_high={w_high})")
                continue
            
            wse0 = df_station['wse0'].values
            a = df_station['a'].values
            b = df_station['b'].values
            
            w_list = np.linspace(w_low, w_high, n_points)
            
            # 向量化计算
            heights_all = wse0[:, np.newaxis] + a[:, np.newaxis] * w_list**b[:, np.newaxis]
            h_list = np.median(heights_all, axis=0)
            hmax = np.max(heights_all, axis=0)
            hmin = np.min(heights_all, axis=0)
            
            # Numba加速的面积计算
            areas = calculate_areas_numba(w_list, h_list, w50, a50)
            
            df_curve = pd.DataFrame({
                'stationid': s,
                'width': w_list,
                'wse': h_list,
                'wse_max': hmax,
                'wse_min': hmin,
                'area': areas
            })
            
            df_res.append(df_curve)
        
        return pd.concat(df_res, ignore_index=True) if df_res else pd.DataFrame()

# ============================================================================
# 模块6: 验证与评估
# ============================================================================
class ModelValidator:
    """模型验证与性能评估"""
    
    @staticmethod
    def relative_rmse(observed, simulated):
        rmse = np.sqrt(mean_squared_error(observed, simulated))
        return rmse / np.mean(observed)
    
    def validate(self, df_hypso, df_width, df_val_folder, df_fit, skip_width_filter=False):
        """
        验证模型性能
        
        Parameters:
        -----------
        skip_width_filter : bool
            是否跳过宽度筛选（用于datemean模式）
        """
        stationids = sorted(df_hypso['stationid'].unique())
        start_date = pd.to_datetime('1979-01-01')
        
        # 【修改】在参数列表中添加 skip_width_filter
        args_list = [
            (s, df_hypso, df_width, df_val_folder, df_fit, start_date, skip_width_filter)
            for s in stationids
        ]
        
        # 使用进程池并行处理
        with mp.Pool(processes=N_WORKERS) as pool:
            results = pool.map(_validate_station_wrapper, args_list)
        
        results = [df for df in results if df is not None]
        
        return pd.concat(results, ignore_index=True) if results else pd.DataFrame()


# ============================================================================
# 【新增】模块7: 不确定性可视化
# ============================================================================
class UncertaintyVisualizer:
    """不确定性结果可视化"""
    
    @staticmethod
    def plot_coverage_summary(df_results, output_path):
        """绘制覆盖率汇总图"""
        # 按站点汇总
        station_summary = df_results.groupby('stationid').agg({
            'coverage_95': 'first',
            'coverage_50': 'first',
            'kge': 'first',
            'nse': 'first',
            'relative_ci_width_95': 'first'
        }).reset_index()
        
        # 过滤掉包含 inf 或 nan 的行
        station_summary = station_summary.replace([np.inf, -np.inf], np.nan)
        station_summary_clean = station_summary.dropna()
        
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        
        # 1. 95%覆盖率分布
        ax1 = axes[0, 0]
        coverage_95_valid = station_summary_clean['coverage_95'].dropna()
        if len(coverage_95_valid) > 0:
            ax1.hist(coverage_95_valid, bins=20, edgecolor='black', alpha=0.7)
            ax1.axvline(x=0.95, color='red', linestyle='--', label='Expected (95%)')
            ax1.axvline(x=coverage_95_valid.mean(), color='blue', 
                       linestyle='-', label=f"Mean ({coverage_95_valid.mean():.2f})")
            ax1.legend()
        ax1.set_xlabel('95% CI Coverage')
        ax1.set_ylabel('Number of Stations')
        ax1.set_title('95% Credible Interval Coverage')
        
        # 2. 50%覆盖率分布
        ax2 = axes[0, 1]
        coverage_50_valid = station_summary_clean['coverage_50'].dropna()
        if len(coverage_50_valid) > 0:
            ax2.hist(coverage_50_valid, bins=20, edgecolor='black', alpha=0.7)
            ax2.axvline(x=0.50, color='red', linestyle='--', label='Expected (50%)')
            ax2.axvline(x=coverage_50_valid.mean(), color='blue',
                       linestyle='-', label=f"Mean ({coverage_50_valid.mean():.2f})")
            ax2.legend()
        ax2.set_xlabel('50% CI Coverage')
        ax2.set_ylabel('Number of Stations')
        ax2.set_title('50% Credible Interval Coverage')
        
        # 3. 覆盖率 vs KGE
        ax3 = axes[1, 0]
        # 过滤有效数据用于散点图
        scatter_data = station_summary_clean[
            station_summary_clean['kge'].notna() & 
            station_summary_clean['coverage_95'].notna() &
            station_summary_clean['relative_ci_width_95'].notna() &
            np.isfinite(station_summary_clean['relative_ci_width_95'])
        ]
        if len(scatter_data) > 0:
            scatter = ax3.scatter(scatter_data['kge'], scatter_data['coverage_95'],
                                 c=scatter_data['relative_ci_width_95'], cmap='viridis',
                                 alpha=0.6)
            ax3.axhline(y=0.95, color='red', linestyle='--', alpha=0.5)
            plt.colorbar(scatter, ax=ax3, label='Relative CI Width')
        ax3.set_xlabel('KGE')
        ax3.set_ylabel('95% CI Coverage')
        ax3.set_title('Coverage vs Performance')
        
        # 4. CI宽度分布
        ax4 = axes[1, 1]
        ci_width_valid = station_summary_clean['relative_ci_width_95'].dropna()
        ci_width_valid = ci_width_valid[np.isfinite(ci_width_valid)]
        if len(ci_width_valid) > 0:
            ax4.hist(ci_width_valid, bins=20, edgecolor='black', alpha=0.7)
        ax4.set_xlabel('Relative 95% CI Width (CI/mean Q)')
        ax4.set_ylabel('Number of Stations')
        ax4.set_title('Uncertainty Width Distribution')
        
        plt.tight_layout()
        plt.savefig(output_path, dpi=150)
        plt.close()
        
        return station_summary
    
    @staticmethod
    def plot_station_timeseries(df_station, stationid, output_path):
        """绘制单站点时序图，包含可信区间"""
        fig, ax = plt.subplots(figsize=(14, 6))
        
        dates = pd.to_datetime(df_station['date'])
        
        # 95% 可信区间
        ax.fill_between(dates, df_station['Q_est_lower_95'], df_station['Q_est_upper_95'],
                       alpha=0.2, color='blue', label='95% CI')
        
        # 50% 可信区间
        ax.fill_between(dates, df_station['Q_est_lower_50'], df_station['Q_est_upper_50'],
                       alpha=0.4, color='blue', label='50% CI')
        
        # 中值估计
        ax.plot(dates, df_station['Q_est_median'], 'b-', linewidth=1, label='Estimated Q (median)')
        
        # 观测值
        ax.plot(dates, df_station['qobs'], 'ko', markersize=3, alpha=0.6, label='Observed Q')
        
        # 标注覆盖率
        coverage_95 = df_station['coverage_95'].iloc[0]
        coverage_50 = df_station['coverage_50'].iloc[0]
        kge = df_station['kge'].iloc[0]
        
        ax.set_xlabel('Date')
        ax.set_ylabel('Discharge (m³/s)')
        ax.set_title(f'Station {stationid}\nKGE={kge:.2f}, 95% Coverage={coverage_95:.2f}, 50% Coverage={coverage_50:.2f}')
        ax.legend(loc='upper right')
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(output_path, dpi=150)
        plt.close()
    
    @staticmethod
    def create_coverage_report(df_results, output_path):
        """生成覆盖率报告"""
        # 按站点汇总
        station_summary = df_results.groupby('stationid').agg({
            'coverage_95': 'first',
            'coverage_50': 'first',
            'kge': 'first',
            'nse': 'first',
            'nrmse': 'first',
            'kge_median': 'first',
            'nse_median': 'first',
            'ci_width_95': 'first',
            'ci_width_50': 'first',
            'relative_ci_width_95': 'first',
            'qobs': 'mean'
        }).reset_index()
        
        station_summary.columns = ['stationid', 'coverage_95', 'coverage_50', 
                                   'kge', 'nse', 'nrmse', 'kge_median', 'nse_median',
                                   'ci_width_95', 'ci_width_50', 'relative_ci_width_95',
                                   'mean_qobs']
        
        # 替换 inf 为 nan，便于统计
        station_summary = station_summary.replace([np.inf, -np.inf], np.nan)
        
        # 计算汇总统计（忽略nan）
        report = {
            'Total Stations': len(station_summary),
            'Valid Stations (no nan)': station_summary.dropna().shape[0],
            'Mean 95% Coverage': station_summary['coverage_95'].mean(skipna=True),
            'Std 95% Coverage': station_summary['coverage_95'].std(skipna=True),
            'Stations with 95% Coverage >= 0.90': (station_summary['coverage_95'] >= 0.90).sum(),
            'Stations with 95% Coverage >= 0.95': (station_summary['coverage_95'] >= 0.95).sum(),
            'Mean 50% Coverage': station_summary['coverage_50'].mean(skipna=True),
            'Std 50% Coverage': station_summary['coverage_50'].std(skipna=True),
            'Mean KGE': station_summary['kge'].mean(skipna=True),
            'Mean NSE': station_summary['nse'].mean(skipna=True),
            'Mean KGE (median)': station_summary['kge_median'].mean(skipna=True),
            'Mean NSE (median)': station_summary['nse_median'].mean(skipna=True),
            'Mean Relative CI Width (95%)': station_summary['relative_ci_width_95'].mean(skipna=True),
        }
        
        # 保存报告
        report_df = pd.DataFrame([report])
        report_df.to_csv(output_path.replace('.csv', '_summary.csv'), index=False)
        station_summary.to_csv(output_path, index=False)
        
        print("\n" + "="*60)
        print("UNCERTAINTY ANALYSIS REPORT")
        print("="*60)
        for key, value in report.items():
            if isinstance(value, float):
                print(f"{key}: {value:.4f}")
            else:
                print(f"{key}: {value}")
        print("="*60)
        
        return station_summary, report


# ============================================================================
# 配置运行函数
# ============================================================================
def run_configuration(a, b, c, d, common_data):
    """
    运行单个配置
    
    Parameters:
    -----------
    a : str
        处理方式: 'node' 或 'datemean'
    b : str
        IQR倍数选项: '1.0', '1.5', '2.0', '2.5', '3.0', '4.0'
    c : str
        QA选项: 'noqa', 'qaloose', 'qastrict'
    d : str
        版本选项: 'VersionD', 'VersionC'
    common_data : dict
        共享数据
    """
    print(f"\n{'='*60}")
    print(f"Running configuration: A={a}, B={b}, C={c}, D={d}")
    print(f"{'='*60}")
    # 【新增】打印不确定性配置
    print(f"Uncertainty settings:")
    print(f"  - Pixel resolution: {PIXEL_RESOLUTION}m")
    print(f"  - Width jitter per bank: ±{WIDTH_JITTER_PER_BANK}m")
    print(f"  - Monte Carlo samples: {N_MC_SAMPLES}")
    
    gc.collect()
    
    # 步骤1: 计算宽度统计（使用IQR方法）
    print("Step 1: Calculating width statistics using IQR method...")
    df_l8 = common_data['df_l8']
    df_w_stats = WidthStatistics.calculate_width_iqr(df_l8)
    
    # 使用IQR方法获取列名
    low_col, high_col = WidthStatistics.get_iqr_columns(b)
    df_w_stats['w_low'] = df_w_stats[low_col]
    df_w_stats['w_high'] = df_w_stats[high_col]
    
    # 【修改】添加"_0.3"后缀
    df_w_stats.to_csv(f'1.width_statistic_iqr_{a}_{b}_{c}_{d}.csv', index=False)
    
    df_comid = common_data['df_comid']
    df_attrs = common_data['df_attrs']

    # 根据c选项加载SWOT数据
    if c == 'noqa':
        # noqa时文件名固定
        df_swot = pd.read_csv(f'1.all_matched_points_{d}.csv')
    else:
        # qaloose或qastrict时，文件名根据a确定
        df_swot = pd.read_csv(f'2.swot_{a}_{c}_{d}.csv')
   
    df_swot = df_swot.merge(df_comid, on='stationid', how='inner')

    if a == 'node':
        print("Step 2: Selecting best nodes...")
        df_swot_filtered, df_nodes = NodeSelector.select_best_nodes(df_swot, min_data_points=10)
        
        print("Step 3: Applying quality control...")
        qc = DataQualityControl(df_w_stats)
        df_qc = qc.apply_qc(df_swot_filtered, draw_figure=False)
        
    elif a == 'datemean':
        print("Using smoothed data (skipping width filter)...")
        # 使用进程池并行处理滑动中值
        groups = [group for _, group in df_swot.groupby('stationid')]
        
        with mp.Pool(processes=N_WORKERS) as pool:
            results = pool.map(_rolling_median_group, groups)
        
        df_qc = pd.concat(results)
    
    if 'COMID' not in df_qc.columns:
        df_qc = df_qc.merge(df_comid, on='stationid', how='left')
    
    cols = ['COMID'] + [col for col in df_qc.columns if col != 'COMID']
    df_qc = df_qc[cols]
    df_qc.to_csv(f'2.swot-points-selection_iqr_{a}_{b}_{c}_{d}.csv', index=False)
    
    # 步骤4: 拟合
    print("Step 4: Fitting hydraulic curves...")
    # 根据a选项决定是否跳过宽度筛选
    skip_width_filter = (a == 'datemean')
    fitter = HydraulicCurveFitter(df_w_stats, df_attrs, skip_width_filter=skip_width_filter)
    df_fit_all = fitter.fit_all_stations(df_qc)
    
    if df_fit_all is None or len(df_fit_all) == 0:
        print(f"No fit data for {a}_{b}_{c}_{d}")
        return
    
    df_fit_all.to_csv(f'3.fit_proba_modified_q50_iqr_{a}_{b}_{c}_{d}.csv', index=False)
    
    # 步骤5: 生成曲线
    print("Step 5: Generating hypsometric curves...")
    df_hypso = HypsometricCurveGenerator.generate_curves(df_fit_all)
    
    if df_hypso is None or len(df_hypso) == 0:
        print(f"No hypsometric curves generated for {a}_{b}_{c}_{d}")
        return
    
    df_hypso.to_csv(f'4.hypso_med_modified_q50_iqr_{a}_{b}_{c}_{d}.csv', index=False)
    
    # 步骤6: 验证（包含不确定性传播）
    print("Step 6: Validating model with uncertainty propagation...")
    validator = ModelValidator()
    df_width = common_data['df_width']
    
    # 【修改】传入 skip_width_filter 参数
    df_results = validator.validate(
        df_hypso, df_width,
        '/home/xj/device5/data/daily_Q',
        df_fit_all,
        skip_width_filter=skip_width_filter  # 传递参数
    )
    
    if df_results is None or len(df_results) == 0:
        print(f"No validation results for {a}_{b}_{c}_{d}")
        return
    
    df_results.to_csv(f'5.q_kge_med_modified_q50_iqr_{a}_{b}_{c}_{d}.csv', index=False)
    
    # ================================================================
    # 【新增】步骤7: 生成不确定性分析报告和可视化
    # ================================================================
    print("Step 7: Generating uncertainty analysis report...")
    visualizer = UncertaintyVisualizer()
    
    # 覆盖率汇总图
    visualizer.plot_coverage_summary(
        df_results, 
        f'6.coverage_summary_{a}_{b}_{c}_{d}.png'
    )
    
    # 覆盖率报告
    station_summary, report = visualizer.create_coverage_report(
        df_results,
        f'6.coverage_report_{a}_{b}_{c}_{d}.csv'
    )
    
    # 【新增】为前5个站点绘制时序图示例
    sample_stations = df_results['stationid'].unique()[:5]
    os.makedirs(f'timeseries_{a}_{b}_{c}_{d}', exist_ok=True)
    for s in sample_stations:
        df_station = df_results[df_results['stationid'] == s]
        visualizer.plot_station_timeseries(
            df_station, s,
            f'timeseries_{a}_{b}_{c}_{d}/station_{s}.png'
        )
    
    print(f"Configuration {a}_{b}_{c}_{d} completed!")
    gc.collect()

# ============================================================================
# 配置生成函数
# ============================================================================
def generate_configs():
    """
    生成所有有效的配置组合
    规则: noqa只和node组合，qaloose和qastrict可以和所有A选项组合
    
    B选项现在是IQR倍数: '1.0', '1.5', '2.0', '2.5', '3.0', '4.0'
    注意: 
    - 当a='node'时，遍历所有b选项，正常进行宽度筛选
    - 当a='datemean'时，也遍历所有b选项（用于验证阶段），但拟合阶段不做宽度筛选
    """
    A_options = ['node']
    B_options = ['1.5']  # IQR倍数
    C_options = ['noqa']
    D_options = ['VersionD']
    
    configs = []
    
    for a in A_options:
        for b in B_options:
            for c in C_options:
                for d in D_options:
                    # noqa只和node组合
                    if c == 'noqa' and a != 'node':
                        continue
                    configs.append((a, b, c, d))
    
    return configs

# ============================================================================
# 主程序
# ============================================================================
def main():
    """主程序流程"""
    import time
    total_start = time.time()
    
    # 【新增】打印不确定性分析配置
    print("="*60)
    print("WIDTH UNCERTAINTY PROPAGATION ANALYSIS")
    print("="*60)
    print(f"Pixel resolution: {PIXEL_RESOLUTION} m")
    print(f"Width uncertainty: ±{2*WIDTH_JITTER_PER_BANK} m (±1 pixel per bank)")
    print(f"Monte Carlo samples: {N_MC_SAMPLES}")
    print("="*60)
    
    print("\nLoading common data...")
    df_l8 = pd.read_csv('../2-preprocess/1.gages3000_glow_datemean_width_timeseries.csv')
    df_comid = pd.read_csv('../2-preprocess/4.q50_weighted_slp.csv')[['stationid', 'COMID']]
    df_attrs = pd.read_csv('../2-preprocess/4.q50_weighted_slp.csv')
    df_width = pd.read_csv('../2-preprocess/1.gages3000_glow_datemean_width_timeseries.csv')
    df_width['date'] = pd.to_datetime(df_width['date'])
    
    common_data = {
        'df_l8': df_l8,
        'df_comid': df_comid,
        'df_attrs': df_attrs,
        'df_width': df_width
    }
    
    # 生成有效配置
    configs = generate_configs()
    
    print(f"\nTotal configurations to run: {len(configs)}")
    print("Configurations:")
    for cfg in configs:
        print(f"  {cfg}")
    
    # 运行所有配置
    for a, b, c, d in configs:
        start = time.time()
        run_configuration(a, b, c, d, common_data)
        print(f"Time for ({a}, {b}, {c}, {d}): {time.time() - start:.2f}s")
    
    # 生成箱型图（包含新指标）
    print("\nGenerating boxplot comparisons...")
    metrics = ['kge', 'nse', 'nrmse', 'coverage_95', 'coverage_50']  # 【新增】覆盖率指标
    data_dict = {metric: [] for metric in metrics}
    labels = []
    
    for a, b, c, d in configs:
        file = f'5.q_kge_med_modified_q50_iqr_{a}_{b}_{c}_{d}.csv'
        if os.path.exists(file):
            df = pd.read_csv(file)
            label = f'{a}_{b}_{c}_{d}'
            labels.append(label)
            for metric in metrics:
                if metric in df.columns:
                    station_metrics = df.groupby('stationid')[metric].first().values
                    data_dict[metric].append(station_metrics)
        
    for metric in metrics:
        if data_dict[metric]:
            fig, ax = plt.subplots(figsize=(14, 6))
            ax.boxplot(data_dict[metric], labels=labels)
            
            # 【新增】添加期望线（对于覆盖率）
            if metric == 'coverage_95':
                ax.axhline(y=0.95, color='red', linestyle='--', label='Expected (95%)')
                ax.legend()
            elif metric == 'coverage_50':
                ax.axhline(y=0.50, color='red', linestyle='--', label='Expected (50%)')
                ax.legend()
            
            ax.set_title(f'{metric.upper()} Boxplot Comparison')
            ax.set_xlabel('Configuration (A_B_C_D)')
            ax.set_ylabel(metric.upper())
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            plt.savefig(f'boxplot_{metric}.png', dpi=150)
            plt.close()
    
    # 【新增】生成汇总对比表
    print("\nGenerating summary comparison table...")
    summary_data = []
    for a, b, c, d in configs:
        report_file = f'6.coverage_report_{a}_{b}_{c}_{d}_summary.csv'
        if os.path.exists(report_file):
            report = pd.read_csv(report_file)
            report['config_a'] = a
            report['config_b'] = b
            report['config_c'] = c
            report['config_d'] = d
            summary_data.append(report)
    
    if summary_data:
        summary_df = pd.concat(summary_data, ignore_index=True)
        summary_df.to_csv('7.all_configs_summary.csv', index=False)
        print("\nAll configurations summary saved to: 7.all_configs_summary.csv")
    
    print(f"\nTotal time: {time.time() - total_start:.2f}s")

if __name__ == '__main__':
    main()

Using 64 workers for parallel processing
WIDTH UNCERTAINTY PROPAGATION ANALYSIS
Pixel resolution: 30.0 m
Width uncertainty: ±60.0 m (±1 pixel per bank)
Monte Carlo samples: 100

Loading common data...

Total configurations to run: 1
Configurations:
  ('node', '1.5', 'noqa', 'VersionD')

Running configuration: A=node, B=1.5, C=noqa, D=VersionD
Uncertainty settings:
  - Pixel resolution: 30.0m
  - Width jitter per bank: ±30.0m
  - Monte Carlo samples: 100
Step 1: Calculating width statistics using IQR method...
Skipped 28 stations due to small IQR (< 5):
  Station Brazil_26800000: Q1=182.72, Q3=182.72, IQR=0.00
  Station Brazil_31700000: Q1=111.73, Q3=115.16, IQR=3.43
  Station Brazil_64795000: Q1=142.96, Q3=146.77, IQR=3.81
  Station Canada_05BH004: Q1=98.55, Q3=102.17, IQR=3.62
  Station Canada_05EF001: Q1=197.20, Q3=198.18, IQR=0.98
  Station Canada_07EE007: Q1=103.99, Q3=108.82, IQR=4.83
  Station Canada_07FB006: Q1=60.08, Q3=60.08, IQR=0.00
  Station Canada_10EB001: Q1=119.95, Q3=12