# 读取保存的历代.npy文件，包含全部X与F，预备进行相关性分析
**独立代码块，需要自定义chkpt_dir与n_gen**

In [None]:
%matplotlib widget

import numpy as np
import math
import os
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
matplotlib.rcParams["font.sans-serif"] = ["SimHei"]  
matplotlib.rcParams["axes.unicode_minus"] = False    # 正常显示负号
from pymoo.indicators.hv import HV

chkpt_dir = r"F:\ResearchMainStream\0.ResearchBySection\C.动力学模型\参数优化\参数优化实现\ParallelSweepSimpack\结果分析组\0210早-nsga2-180群150代-收敛"

n_gen = 88  # 总迭代轮次
n_obj = 3   # 目标维度
n_var = 12  # 设计变量(参数)维度

all_X_history = []  # 用于收集所有代次的 X
all_F_history = []  # 用于收集所有代次的 F

for gen in range(1, n_gen + 1):
    # 注意这里 filename 要用 f"generation_{gen}.npz"
    filename = os.path.join(chkpt_dir, f"generation_{gen}.npz")
    
    # 读取 .npz 文件
    with np.load(filename) as data:
        X = data["X"]  # shape = (pop_size, n_var)
        F = data["F"]  # shape = (pop_size, n_obj)
    
    # 将当前代的数据放入 list
    all_X_history.append(X)
    all_F_history.append(F)

# 现在 all_X_history 和 all_F_history 都是 list，每个元素是 (pop_size, n_var) or (pop_size, n_obj)
# 如果想把所有代次堆成一个大的 2D 矩阵，可以用 np.vstack:

all_X_history = np.vstack(all_X_history)  # shape = (n_gen * pop_size, n_var)
all_F_history = np.vstack(all_F_history)  # shape = (n_gen * pop_size, n_obj)

print("all_X_history shape:", all_X_history.shape)
print("all_F_history shape:", all_F_history.shape)

# 可视化 {X1,…,X12}与 {Y1,Y2,Y3}之间的相关系数子矩阵
**依赖于X与F的.npy数据导入**

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

param_names = [f"X{i+1}" for i in range(12)]
target_names = [f"Y{i+1}" for i in range(3)]

df_X = pd.DataFrame(all_X_history, columns=param_names)
df_F = pd.DataFrame(all_F_history, columns=target_names)

# 计算一个完整的相关矩阵(15x15)，然后只取想看的 12x3 或 3x12 子矩阵
df_all = pd.concat([df_X, df_F], axis=1)

corr_matrix = df_all.corr(method="pearson")  # 形状(15, 15)

# 只截取 X1~X12 与 Y1~Y3 的部分, 这会是形状 (12, 3)
sub_corr = corr_matrix.loc[param_names, target_names]

print("子矩阵 shape:", sub_corr.shape)
print(sub_corr)

# 可视化, 让 x 轴显示 Y1~Y3, y 轴显示 X1~X12
plt.figure(figsize=(6, 8))
sns.heatmap(sub_corr, annot=True, cmap="coolwarm", center=0)
plt.title("Correlation of X1..X12 vs. Y1..Y3")
plt.show()


# 计算 Sobol 主效应 (S1) 和全效应 (ST)，并绘制指数热力图
**依赖于X与F的.npy数据导入**

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# SALib 1.5.1+ 官方建议使用 `sobol.sample` 而不是 `saltelli.sample`
from SALib.sample import sobol
from SALib.analyze import sobol as sobol_analyze

# ============ 1. 载入已有数据 ============
X = all_X_history  
Y = all_F_history  
print("X shape:", X.shape)  # (5808, 12)
print("Y shape:", Y.shape)  # (5808, 3)

# ============ 2. 多输出随机森林训练 ============
# 拆分训练集、测试集 (80% 训练 + 20% 测试)
# test_size = 0.2 表示划分方式，可自行调大或调小
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 
                                                    test_size=0.2,
                                                    random_state=42)
# 构造多输出随机森林
# 重要参数解释:
# - n_estimators=100: 随机森林中树的数量，越多越稳定但计算更慢
# - max_depth=None: 默认为None表示树可以不受限地生长，视数据量而定
# - random_state=42: 随机种子，保证结果可复现
rf = RandomForestRegressor(n_estimators=100, 
                           max_depth=None, 
                           random_state=42)
print("[Info] 开始训练多输出随机森林...")
rf.fit(X_train, Y_train)  # Y_train.shape=(n_samples, 3) 多输出回归
# 预测测试集
Y_pred = rf.predict(X_test)  # shape=(test_size, 3)
# 分别计算每个目标的 MSE
mse_list = []
for j in range(Y.shape[1]):
    mse_j = mean_squared_error(Y_test[:, j], Y_pred[:, j])
    mse_list.append(mse_j)
    print(f"RandomForest for Y{j+1}, Test MSE = {mse_j:.4f}")

# ============ 3. 全局灵敏度分析 (Sobol) ============
# 3.1 定义输入参数范围 (bounds)
# 这里简单取历史数据的最小值 & 最大值，你也可以使用工程先验信息
X_min = X.min(axis=0)  # shape=(12,)
X_max = X.max(axis=0)  # shape=(12,)
problem = {
    'num_vars': 12,
    'names': param_names,
    'bounds': [[X_min[i], X_max[i]] for i in range(12)]
}

# 采样数量(越大结果越稳定, 但计算量增加)
# 注意 Sobol 序列收敛性最好使用 2^n, 例如 512, 1024, 2048
N_base = 256  
print("[Info] 使用 Sobol 采样, N_base =", N_base)
param_values = sobol.sample(problem, N_base, calc_second_order=True)
print("param_values shape:", param_values.shape)

# 3.2 评估代理模型 (rf) 在采样点的预测值
# 由于是多输出, Y_sobol_pred.shape = (param_values.shape[0], 3)
Y_sobol_pred = rf.predict(param_values)  

S1_list = []  # 用于存放3个目标的 S1 (each is shape=(12,))
ST_list = []  # 用于存放3个目标的 ST

# 3.3 分别对每个目标做 Sobol 指数分析
for j in range(Y.shape[1]):
    print(f"\n=== Sobol Analysis for Y{j+1} ===")
    # 当前目标在所有采样点的值
    current_output = Y_sobol_pred[:, j]

    # 做 Sobol 分析
    Si = sobol_analyze.analyze(problem, current_output, calc_second_order=True)

    # 打印主效应 (S1) 和全效应 (ST)
    # 注意: S1, ST 都是长度12的数组, 对应 X1..X12
    s1 = Si['S1']  # shape=(12,)
    st = Si['ST']  # shape=(12,)
    print("Sobol First Order Indices (S1):", s1)
    print("Sobol Total Effect Indices (ST):", st)

    # 保存到列表
    S1_list.append(s1)
    ST_list.append(st)

# 将list转换为矩阵, shape=(3, 12) => 行=目标, 列=参数
S1_array = np.array(S1_list)  # shape=(3,12)
ST_array = np.array(ST_list)  # shape=(3,12)

# 转置后, shape=(12,3) => 行=参数 X1..X12, 列=目标 Y1..Y3
S1_array_t = S1_array.T
ST_array_t = ST_array.T

# 画热力图（行 = X, 列 = Y）
# 以全效应 (ST)为例：
param_names = [f"X{i+1}" for i in range(12)]  # 行标签
target_names = [f"Y{i+1}" for i in range(3)]  # 列标签

plt.figure(figsize=(8, 6))

sns.heatmap(ST_array_t,       # shape=(12,3)
            annot=True,       # 在格子中显示数值
            cmap="coolwarm",  # 颜色方案，可换成 "bwr", "RdBu_r", etc.
            vmin=0, vmax=0.8,  
            xticklabels=target_names,
            yticklabels=param_names
           )

plt.title("Sobol ST Indices (X in rows, Y in columns)")
plt.xlabel("Objectives")
plt.ylabel("Parameters")
plt.show()
