In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter
from torchinfo import summary
from sklearn.preprocessing import RobustScaler
from scipy.stats import wasserstein_distance
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import platform
import datetime
import logging
if platform.system() == 'Darwin':  # macOS
    plt.rcParams['font.family'] = ['Songti SC']
elif platform.system() == 'Windows':
    plt.rcParams['font.family'] = ['SimSun']
else:  # Linux
    plt.rcParams['font.family'] = ['Noto Sans CJK SC']
matplotlib.rcParams['axes.unicode_minus'] = False # 解决负号显示问题

In [None]:
config = {}
with open('./config.json', 'r', encoding='utf-8') as f:
    config= json.load(f)

print(json.dumps(config, indent=2, ensure_ascii=False))

In [None]:

class MultiScaleStockDataset(Dataset):
    """
    scaler_factors=['30min', '1hour', '4hour', '1day'],
    """
    def __init__(self, scaler_factors=None, data=None,
                 seq_length=120,
                 pred_steps=1,
                 target_scale='30min',
                 file_path=None, prefix=None):
        super().__init__()
        if scaler_factors is None:
            scaler_factors = ['30min', '1hour', '4hour', '1day']
        self.data = data
        self.seq_length = seq_length
        self.pred_steps= pred_steps
        self.target_scale = target_scale
        self.scaler_factors = scaler_factors
        self.normalized_data = {}  # 存储归一化后数据
        self.feature_scaler = RobustScaler()
        self.raw_data = {
            scaler: self._import_synthetic_data(file_path=file_path, file_name=f"{prefix}{scaler}") for scaler in scaler_factors
        }
        self.time_serial = self.raw_data[scaler_factors[0]].index
        self.aligned_data = self._align_time_index(data=self.raw_data)
        self._normalize_data()
        self.feature_dims = {
            scale: data.shape[1] for scale, data in self.raw_data.items()
        }
        self.feature_names = self.raw_data[scaler_factors[0]].columns.tolist()

    def split_dataset(self, test_ratio=0.2):
        """时序安全的数据分割"""
        total_len = len(self)
        split_idx = int(total_len * (1 - test_ratio))
        return Subset(self, range(split_idx)), Subset(self, range(split_idx, total_len))

    def _align_time_index(self, base_scale='30min', data=None):
        """以最细粒度时间轴为基准进行对齐"""
        base_df = data[base_scale]
        aligned_data = {}
        daily_df = data['1day'].copy()    
        # 将日级数据时间对齐到基准时间轴的自然日边界
        daily_df.index = daily_df.index.normalize() + pd.Timedelta(hours=23)  # 对齐到23:00  
        for scale, df in data.items():
            if scale == '1day':
                aligned_df = daily_df.reindex(base_df.index, method='ffill')
                # 清除跨日数据
                mask = (aligned_df.index.time == pd.to_datetime('23:00').time()).reshape(-1, 1)
                mask = np.tile(mask, (1, aligned_df.shape[1]))  # 扩展到所有列
                
                # 应用mask并前向填充
                aligned_df = pd.DataFrame(
                    np.where(mask, aligned_df.values, np.nan),
                    index=aligned_df.index,
                    columns=aligned_df.columns
                ).ffill()
            elif scale in ['4hour','1hour']: 
                aligned_df = df.resample('30min').interpolate('linear')
            else:    
                # 前向填充粗粒度数据
                aligned_df = df.reindex(base_df.index, method='ffill')
            # 在ffill后添加异常过滤
            # aligned_df = aligned_df.where(
            #     (aligned_df.diff().abs() < 3*aligned_df.std()) | 
            #     (aligned_df.isna()), 
            #     method='ffill'
            # )
            aligned_data[scale] = aligned_df.dropna()
        # 统一裁剪时保留有效时间边界
        min_len = min(len(df[(df.index >= base_df.index[0]) & 
                            (df.index <= base_df.index[-1])]) 
                    for df in aligned_data.values())
        for scale in aligned_data:
            aligned_data[scale] = aligned_data[scale].iloc[:min_len]

        return aligned_data
    def __getitem__(self, idx):
        """""""""  """
        features = {}
        target_idx = idx + self.seq_length

        # 获取各尺度特征序列
        for scale in self.scaler_factors:
            scale_data = self.aligned_data[scale].iloc[idx:idx + self.seq_length]
            features[scale] = torch.FloatTensor(scale_data.drop(columns=['close']).values)

        # 获取目标值
        target = self.aligned_data[self.target_scale]['close'].iloc[
                 target_idx:target_idx + self.pred_steps
        ]

        return features,  torch.FloatTensor(target.values)

    def __len__(self):
        return len(self.aligned_data[self.target_scale]) - self.seq_length - self.pred_steps

    def _import_synthetic_data(self, file_path=None, file_name=None):
        """生成数据"""
        if file_path is None:
            return self.data
        data = pd.read_csv(f"{file_path}/{file_name}.csv",
                           parse_dates=['date'],
                           index_col='date')

        return data.dropna()

    def _normalize_data(self):
        """分离特征与目标的标准化"""
        self.scalers = {'features': {}, 'target': {}}

        # 特征标准化（排除close）
        for scale in self.scaler_factors:
            feature_cols = [c for c in self.aligned_data[scale].columns if c != 'close']

            self.aligned_data[scale][feature_cols] = self.feature_scaler.fit_transform(
                self.aligned_data[scale][feature_cols]
            )
            self.scalers['features'][scale] = self.feature_scaler

        # 目标值标准化（仅close）
        target_scaler = RobustScaler()
        for scale in self.scaler_factors:
            self.aligned_data[scale]['close'] = target_scaler.fit_transform(
                self.aligned_data[scale][['close']]
            )
        self.scalers['target'] = target_scaler
    # 新增反归一化方法
    def inverse_transform(self, data, scale_type='target'):
        if scale_type == 'target':
            return self.scalers['target'].inverse_transform(data)
        else:
            return self.scalers['features'][scale_type].inverse_transform(data)
    def _validate_features(self):
        """日级数据完整性检查"""
        daily_data = self.aligned_data['1day']
        
        # 检查每日数据唯一性
        day_groups = daily_data.groupby(pd.Grouper(freq='D'))
        for date, group in day_groups:
            if len(group) > 0:
                # 验证当日所有记录相同
                if not group.eq(group.iloc[0]).all().all():
                    raise ValueError(f"Daily data inconsistency on {date.strftime('%Y-%m-%d')}")
    
        # 检查时间戳是否在每日23:00
        assert all(ts.time() == pd.to_datetime('23:00').time() 
               for ts in daily_data.resample('D').last().index), \
           "Daily data timestamps misaligned"
        
        """确保所有尺度包含相同特征且排除目标列"""
        base_columns = set(self.aligned_data[self.scaler_factors[0]].columns) - {'close'}
        for scale in self.scaler_factors:
            assert 'close' in self.aligned_data[scale].columns, \
                f"Close price missing in {scale} scale!"
            assert set(self.aligned_data[scale].columns) - {'close'} == base_columns, \
                f"Feature mismatch in {scale} scale!"
        for scale in self.scaler_factors:
            orig = self.raw_data[scale].close.pct_change().dropna()
            aligned = self.aligned_data[scale].close.pct_change().dropna()
            assert wasserstein_distance(orig, aligned) < 0.1, f"{scale}分布偏移超标"
    def print(self):
        for scale, data in self.aligned_data.items():
            print(f"{scale}: {data.shape}")
            if(scale == '1day'):
                print(data.head(50))

In [None]:
dataset = MultiScaleStockDataset(file_path=config['data_path'], 
                                 scaler_factors=config['scaler_factors'], 
                                 prefix='train-')
# dataset.print()

In [None]:
def plot_temporal_alignment(dataset, n_samples=3):
    time_serial = dataset.time_serial
    fig, axs = plt.subplots(n_samples, 1, figsize=(15, 3*n_samples))
    
    for i in range(n_samples):
        idx = i * dataset.seq_length  # Calculate actual data position
        sample_dates = time_serial[idx:idx+dataset.seq_length]
        sample = dataset[i]
        for scale in dataset.scaler_factors:
            features = dataset.aligned_data[scale].iloc[idx:idx+dataset.seq_length, 0]
            axs[i].plot(sample_dates, features, label=f'{scale} close')
        axs[i].set_title(f'Sample {i} - Feature Alignment')
        plt.setp(axs[i].xaxis.get_majorticklabels(), rotation=45)
        axs[i].legend()
        axs[i].xaxis.set_major_locator(mdates.AutoDateLocator())
        axs[i].xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M"))
    plt.tight_layout()
    plt.show()
plot_temporal_alignment(dataset)

In [None]:
# ====================
# 2. 训练日志系统
# ====================
class TrainingLogger:
    def __init__(self, log_dir="logs"):
        self.metrics = {
            'train_loss': [],
            'train/batch_loss':[],
            'val_loss': [],
            'lr': [],
            'learning_rate/group_0': [],  # 显式初始化参数组0
            'learning_rate/group_1': []   # 根据实际参数组数量添加
        }
        # TensorBoard日志
        timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        self.writer = SummaryWriter(f'{log_dir}/tensorboard/{timestamp}')
        
        # 文本日志
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(f'{log_dir}/training.log',encoding='utf-8'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('PyTorch Training')

    def log_scalar(self, tag, value, step):
        """记录标量数据"""
        self.writer.add_scalar(tag, value, step)
        self.metrics[tag].append((step, value))

    def log_histogram(self, tag, values, step):
        """记录参数分布"""
        self.writer.add_histogram(tag, values, step)

    def log_learning_rate(self, optimizer, step):
        """记录学习率"""
        for i, group in enumerate(optimizer.param_groups):
            self.log_scalar(f'learning_rate/group_{i}', group['lr'], step)

    def log_gradients(self, model, step):
        """记录梯度分布"""
        for name, param in model.named_parameters():
            if param.grad is not None:
                self.log_histogram(f"gradients/{name}", param.grad, step)

    def close(self):
        self.writer.close()

class AdvancedLogger(TrainingLogger):
    def log_model_graph(self, model, dummy_input):
        """记录模型计算图"""
        self.writer.add_graph(model, dummy_input)
    
    def log_confusion_matrix(self, y_true, y_pred, classes, step):
        """记录混淆矩阵"""
        # cm = confusion_matrix(y_true, y_pred)
        # fig = plt.figure()
        # sns.heatmap(cm, annot=True, fmt='d', xticklabels=classes, yticklabels=classes)
        # self.writer.add_figure('confusion_matrix', fig, step)
    
    def log_embeddings(self, embeddings, metadata, step):
        """记录嵌入向量"""
        self.writer.add_embedding(
            embeddings,
            metadata=metadata,
            global_step=step
        )

In [None]:
class ScaleAwareExpert(nn.Module):    
    """专家"""
    def __init__(self, input_dim, scale_type, arch_config):
        super().__init__()

        # 设置默认值
        self.arch_config = {
            'high_freq': {
                'conv_channels': [input_dim, 32, 64],
                'lstm_units': 64,
                'bidirectional': True
            },
            'low_freq': {
                'transformer_layers': 2,
                'nhead': 4,
                'ff_dim': 128
            }
        }
        # 用配置覆盖默认值
        self.arch_config.update(arch_config)
        self.scale_type = scale_type

        self.build_model(input_dim)
    
    def build_model(self, input_dim):
        """构建模型"""
        if '30min' in self.scale_type or '1hour' in self.scale_type:
            """高频专家: CNN + BiLSTM """
            high_freq = self.arch_config.get('high_freq')
            self.proj_in = nn.Linear(input_dim, 30)  # 新增维度对齐层
            self.conv = nn.Conv1d(input_dim, 
                                  out_channels=16,
                                  kernel_size=3, 
                                  padding='same')
            self.lstm = nn.LSTM(16, # 输入维度
                                high_freq.get('lstm_units',32),  #隐藏单元数
                                num_layers=high_freq.get('num_layers',1),  # 堆叠层数，增加模型复杂度以捕捉多级时序模式
                                batch_first=True, 
                                dropout=0.2, # 随机丢弃部分神经元，减少过拟合
                                bidirectional=True # 双向LSTM，增强模型表达能力， 输出时 hidden_size * 2
                                )
            self.proj = nn.Linear(64, 32)
        else:
            """低频专家: LSTM """
            self.proj_in = nn.Linear(input_dim, 4)  # 3维→4维
            self.encoder_layer = nn.TransformerEncoderLayer(d_model=4, 
                                                            nhead=2, 
                                                            dim_feedforward=64, 
                                                            batch_first=True)
            self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=3)
            self.proj = nn.Linear(4, 32)
        self.predictor = nn.Linear(32, 1)
    
    def forward(self, x):
        """输入: [batch, seq_len, dim]"""
        if '30min' in self.scale_type or '1h' in self.scale_type:
            x = self.proj_in(x)
            x = self.conv(x.permute(0,2,1)).permute(0,2,1)
            x, _ = self.lstm(x)
            x = x[:, -1, :]  # 取最后一个时间步 [batch, 4]
            x = self.proj(x)
        else:
            x = self.proj_in(x)
            x = self.transformer(x)
            x = self.proj(x[:,-1,:]) # 只取最后一个时间步的输出
        return self.predictor(x)

In [None]:
class DymaMoE(nn.Module):
    """动态混合专家网络"""
    def __init__(self, input_dim=30, experts=['30min','1hour','4hour','1day'], hidden_dim=64,model_config=None, output_dim=1):
        super().__init__()
        self.experts = nn.ModuleDict({
            scale: ScaleAwareExpert(input_dim, scale, arch_config=model_config.get('expert_config')) for scale in experts
        })
        gate_network = model_config.get('gate_network')
        """动态构建门控网络"""
        layers = []
        prev_dim = input_dim * len(experts)
        for dim in gate_network.get('hidden_dims'):
            layers.append(nn.Linear(prev_dim, dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(gate_network.get('dropout_rate')))
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, len(self.experts)))
        layers.append(nn.Softmax(dim=-1))
        self.gate = nn.Sequential(*layers)
        # self.gate = nn.Sequential(
        #     nn.Linear(input_dim * len(experts), hidden_dim),
        #     nn.ReLU(),
        #     nn.Dropout(gate_network.get('dropout_rate')),
        #     nn.Linear(hidden_dim, len(experts)),
        #     nn.Softmax(dim=-1)
        # )
    
    def forward(self, inputs):
        expert_outputs = {}
        for scale, expert in self.experts.items():
            expert_outputs[scale] = expert(inputs[scale]).squeeze(-1)  # [32,1] -> [32]
        # 动态门控
        gate_input = torch.cat([v for v in inputs.values()], dim=-1)
    
        weights = self.gate(gate_input.mean(dim=1))  # (batch, num_experts)
        
        # 加权融合
        combined = sum(weights[:, i] * expert_outputs[scale] 
                      for i, scale in enumerate(self.experts.keys()))
        return combined.unsqueeze(-1), expert_outputs  
    def print_model_structure(self):
        """打印各专家结构"""
        for name, expert in self.experts.items():
            print(f"\n=== {name}专家架构 ===")
            print("卷积通道:", expert.arch_config['high_freq']['conv_channels'])
            print("LSTM单元:", expert.arch_config['high_freq']['lstm_units'])
            print("双向结构:", expert.arch_config['high_freq']['bidirectional'])
            print("实际参数量:", sum(p.numel() for p in expert.parameters()))

In [None]:
class ModelAnalyzer:
    def __init__(self, model, input_size):
        self.model = model
        self.input_size = input_size
        
    def print_summary(self):
        """打印类似Keras的模型摘要"""
        try:
            print("\nModel Architecture Summary:")
            summary(self.model, input_size=self.input_size, 
                    depth=3, 
                    col_names=["input_size", "output_size", "num_params"])
        except ImportError:
            self._fallback_summary()
    
    def _fallback_summary(self):
        """备选简易摘要"""
        print("\nModel Architecture:")
        total_params = 0
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                print(f"{name:30} | Shape: {str(param.shape):20} | Params: {param.numel()}")
                total_params += param.numel()
        print(f"\nTotal Trainable Parameters: {total_params:,}")


In [None]:
def train(dataset, model_config =None, trade_config = None):
    """train dataset"""
    logger = TrainingLogger("../logs")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = DymaMoE(input_dim=30, model_config=model_config).to(device)

    model.print_model_structure()
    # 使用示例
    test_input = {
        '30min': torch.randn(32, 24, 30),
        '1hour': torch.randn(32, 24, 30),
        '4hour': torch.randn(32, 24, 4),
        '1day': torch.randn(32, 24, 1)
    }
    # analyzer = ModelAnalyzer(model, input_size=[(24,30), (24,30), (24,4), (24,1)])  # seq_len=24, input_dim=30
    # analyzer.print_summary()
    optimizer = torch.optim.AdamW([
         {'params': model.experts.parameters(), 'lr': model_config['learning_rate']},
        {'params': model.gate.parameters(), 'lr': 1e-3}
    ])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
    def hybrid_loss(pred, target, expert_outs, alpha=0.7):
        pred = pred.squeeze(-1)  # (32,1) → (32)
        target = target.squeeze()  # [32] remains
        mse = nn.MSELoss()
        main_loss = mse(pred, target)
        
        # 趋势一致性约束
        trends = torch.stack([torch.sign(out.detach()) for out in expert_outs.values()])
        consistency = torch.mean(torch.prod(trends, dim=0))
        
        return alpha*main_loss + (1-alpha)*(1 - consistency)

    loss_history = []
    expert_weights = []
    predictions = []
    targets = []
    loader = DataLoader(dataset, batch_size=trade_config['batch_size'], shuffle=True)
    epochs = trade_config['epochs']
    try:
        for epoch in range(epochs):
            epoch_loss = 0
            running_loss = 0.0
            model.train()
            # 学习率记录
            logger.log_learning_rate(optimizer, epoch)
            for batch_idx, (inputs, target) in enumerate(loader):
            # 修复输入格式问题
                if isinstance(inputs, dict):  # 保持原有字典处理
                    inputs = {k:v.to(device) for k,v in inputs.items()}
                else:  # 处理张量输入的情况
                    inputs = inputs.to(device)
                
                target = target.to(device)
                
                pred, expert_outs = model(inputs)
                loss = hybrid_loss(pred, target, expert_outs)
                
                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), 0.5)

                # 梯度记录
                if batch_idx % 10 == 0:
                    logger.log_gradients(model, epoch * len(loader) + batch_idx)
                    
                optimizer.step()
                # 记录损失
                running_loss += loss.item()
                if batch_idx % 50 == 49:
                    avg_loss = running_loss / 50
                    global_step = epoch * len(loader) + batch_idx
                    logger.log_scalar('train/batch_loss', avg_loss, global_step)
                    running_loss = 0.0
                    
                    # 参数分布记录
                    for name, param in model.named_parameters():
                        logger.log_histogram(f"parameters/{name}", param, global_step)
                # 收集预测结果
                with torch.no_grad():
                    preds = dataset.inverse_transform(pred.cpu().numpy())
                    targets_orig = dataset.inverse_transform(target.cpu().numpy())
                    predictions.extend(preds)
                    targets.extend(targets_orig)
                
                epoch_loss += loss.item()
            # 记录专家权重
            with torch.no_grad():
                dummy_input = {k: torch.randn(1,24,30).to(device) for k in model.experts}
                _, weights = model(dummy_input)
                expert_weights.append([list(out.parameters())[0].detach().cpu().numpy().mean() for out in model.experts.values()])
            avg_loss = epoch_loss / len(loader)
            loss_history.append(avg_loss)    
            scheduler.step()
            # 控制台日志
            logger.logger.info(
                f"Epoch {epoch+1}/{epochs} | "
                f"Train Loss: {loss.item():.4f} | "
                # f"Val Loss: {val_loss:.4f} | "
                f"LR: {optimizer.param_groups[0]['lr']:.2e}"
            )
    finally:
        logger.close()
    return model, (loss_history, expert_weights), (targets, predictions)

model, history, results = train(dataset=dataset, 
                                model_config=config['props'][0]['model_config'], 
                                trade_config = config['props'][0]['trade_config'])

In [None]:
def visualize_training(loss_history, expert_weights, targets, predictions):
    """训练过程可视化分析"""
    plt.figure(figsize=(15,5))
    smooth_loss = pd.Series(loss_history).rolling(5, min_periods=1).mean()
    # 损失曲线
    plt.subplot(1,3,1)
    plt.plot(smooth_loss, label='Training Loss')
    plt.title("Loss Curve")
    plt.xlabel("Epoch"), plt.ylabel("Loss")
    
    # 专家权重分布
    plt.subplot(1,3,2)
    sns.heatmap(np.array(expert_weights), cmap='viridis', 
                xticklabels=['30min','1hour','4hour','1day'])
    plt.title("Expert Weights Distribution")
    plt.xlabel("Experts"), plt.ylabel("Epoch")
    
    # 预测结果示例
    plt.subplot(1,3,3)
    plt.scatter(targets[:100], predictions[:100], alpha=0.5)
    # plt.plot([min(targets), max(targets)], [min(targets), max(targets)], 'r--')
    plt.plot(targets[:200], label='True Price')
    plt.plot(predictions[:200], alpha=0.7, label='Predicted')
    plt.title("Predictions vs Ground Truth")
    plt.xlabel("True Values"), plt.ylabel("Predictions")
    
    plt.tight_layout()
    plt.show()

"""可视化分析"""
visualize_training(history[0], history[1], targets=results[0], predictions=results[1])

In [None]:
def calculate_metrics(targets, predictions, config):
    # 确保使用反归一化后的原始价格单位
    y_true = np.array(targets)
    y_pred = np.array(predictions)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    metric_funcs = {
        'MAE': mae,
        'MSE': mse,
        'R2': r2,
        'RMSE': rmse,
        'MAPE': np.mean(np.abs((y_true - y_pred)/y_true)) * 100,
        'Sharpe': (np.mean(y_pred) - np.mean(y_true)) / np.std(y_true - y_pred)
    }
    metrics = {}
    # 根据配置筛选指标
    for metric in config['metrics']:
        if metric in metric_funcs:
            metrics[metric] = metric_funcs[metric]
    return metrics
def visualize_training2(dataset, loss_history, expert_weights, targets, predictions, config):
    vis_config = config.get('visualization')
    plt.figure(figsize=(20,6))
    
    # 1. 损失曲线（增加指标显示）
    plt.subplot(1,4,1)
    smooth_loss = pd.Series(loss_history).rolling(5, min_periods=1).mean()
    plt.plot(smooth_loss, label='Smoothed Loss')
    
    # 计算并显示关键指标
    metrics = calculate_metrics(targets, predictions, config)
    metric_text = "\n".join([f"{k}: {v:.4f}" for k,v in metrics.items()])
    plt.text(0.5, 0.3, metric_text, transform=plt.gca().transAxes,
             bbox=dict(facecolor='white', alpha=0.5))
    
    plt.title("Training Loss & Metrics")
    plt.xlabel("Epoch"), plt.ylabel("Loss")

    # 2. 专家权重演化
    plt.subplot(1,4,2)
    weights_df = pd.DataFrame(expert_weights, 
                            columns=['30min','1h','4h','1d'])
    sns.lineplot(data=weights_df, dashes=False)
    plt.title("Expert Weights Evolution")
    plt.xlabel("Epoch"), plt.ylabel("Weight Value")

    # 3. 价格预测对比
    plt.subplot(1,4,3)
    plt.plot(targets[:200], label='True Price')
    plt.plot(predictions[:200], alpha=0.7, label='Predicted')
    
    # 添加残差分布子图
    ax = plt.gca()
    ax.inset_axes([0.6,0.15,0.35,0.25]).hist(
        np.array(targets)-np.array(predictions),
        bins=30, density=True
    )
    plt.title("Residual Distribution")
    
    plt.title("Price Prediction Comparison")
    plt.xlabel("Time Step"), plt.ylabel("Price (RMB)")
    plt.legend()

    # 4. 特征重要性热图（新增）
    plt.subplot(1,4,4)
    feature_importance = dataset.aligned_data['30min'].corrwith(
        pd.Series(predictions[:len(dataset.aligned_data['30min'])])
    ).sort_values()
    sns.heatmap(feature_importance.to_frame().T, cmap='coolwarm',
                annot=True, fmt=".2f")
    plt.title("Feature Correlation Heatmap")
    plt.xticks(rotation=45)

    plt.tight_layout()
    plt.show()
visualize_training2(dataset, *history, targets=results[0], predictions=results[1], config=config.get('props')[0].get('eval_config'))