In [None]:
import pandas as pd
import numpy as np
import json
from pandas.plotting import table
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
# from torchsummary import summary
from torchinfo import summary

from typing import Dict, Any
import platform
if platform.system() == 'Darwin':  # macOS
    plt.rcParams['font.family'] = ['Songti SC']
elif platform.system() == 'Windows':
    plt.rcParams['font.family'] = ['SimSun']
else:  # Linux
    plt.rcParams['font.family'] = ['Noto Sans CJK SC']
# matplotlib.rcParams['font.family']= ['Songti SC']  # 使用黑体-简
matplotlib.rcParams['axes.unicode_minus'] = False # 解决负号显示问题

In [None]:
config = {}
with open('./config.json', 'r', encoding='utf-8') as f:
    config= json.load(f)

print(json.dumps(config, indent=2, ensure_ascii=False))

In [None]:
class MultiScaleStockDataset(Dataset):
    """多模尺度数据集"""
    def __init__(self, scale_factors, seq_length=24, pred_length=4, data_path='./'):
        self.standardScaler = StandardScaler()
        self.scale_factors = scale_factors
        self.seq_length = seq_length
        self.pred_length = pred_length
        self.data_path = data_path
        self.data = {
            scale: self._generate_synthetic_data(scale) for scale in self.scale_factors
        }
        t30 = pd.read_csv(self.data_path + 'train-30min.csv', 
                           parse_dates=['date'], index_col='date')
        self.feature_names = t30.columns.tolist()  # 新增特征名称存储
        self.feature_dims = {
            scale: data.shape[1] for scale, data in self.data.items()
        }
        # print(t30.tail())
        # 验证数据维度
        for scale, data in self.data.items():
            min_required = seq_length * {'30min':48, '1hour':24, '4hour':6, '1day':1}[scale]
            assert len(data) >= min_required, \
                f"{scale}数据至少需要{min_required}条，当前仅{len(data)}条"
            # assert len(data) > seq_length + pred_length, \
            #     f"{scale}数据长度不足，需要至少{seq_length + pred_length}个样本"
            # assert data.shape[1] == 4, \
            #     f"{scale}数据特征维度应为4，实际为{data.shape[1]}"
    
    def _generate_synthetic_data(self, scale):
        """获取合成数据"""
        data = pd.read_csv(self.data_path + f'train-{scale}.csv', 
                           usecols=['date','open', 'high', 'low', 'close'],
                           parse_dates=['date'], index_col='date')
        return self.standardScaler.fit_transform(data)
        # return data
    
    def _align_scales(self, idx):
        """三线性插值对齐时间轴"""
        aligned = {}
        base_idx = idx // 8  # 对齐到日线级别
        for scale in self.scale_factors:
            ratio = {'30min':48, '1hour':24, '4hour':6, '1day':1}[scale]

            scale_idx = int(base_idx * ratio)
            seq_items = int(self.seq_length * ratio)

            max_valid_idx = len(self.data[scale]) - seq_items
            scale_idx = max(0, min(scale_idx, max_valid_idx))  # 确保不越界

            aligned_seq = self.data[scale][scale_idx:scale_idx + seq_items]
            # 添加空数据检查
            if len(aligned_seq) == 0:
                aligned_seq = np.zeros((self.seq_length, 4))  # 用零填充
            if isinstance(aligned_seq, pd.DataFrame):
                aligned_seq = aligned_seq.values
            # 确保转换为numpy数组
            if isinstance(aligned_seq, np.ndarray):
                aligned_seq = aligned_seq.copy()
            else:
                aligned_seq = aligned_seq.values.copy()
            # Linear interpolation if needed
            if len(aligned_seq) < self.seq_length:
                x_orig = np.arange(len(aligned_seq))
                x_new = np.linspace(0, len(aligned_seq)-1, self.seq_length)
                aligned_seq = np.array([np.interp(x_new, x_orig, col) 
                                    for col in aligned_seq.T]).T
                
            aligned[scale] = aligned_seq[:self.seq_length]
            # 修改对齐处理（如果需要保持31维）
            # aligned = {k: np.pad(v, ((0,0),(0,27))) if v.shape[1]==4 else v  # 4 → 31
            #         for k,v in inputs.items()}
        return aligned
    # 新增方法：可视化对齐后的样本特征
    def visualize_aligned_features(self, num_samples=3):
        """可视化对齐后的特征结构"""
        for _ in range(num_samples):
            idx = np.random.randint(0, len(self))
            inputs, target = self[idx]
            
            fig, axs = plt.subplots(len(self.scale_factors), 1, 
                                  figsize=(12, 2*len(self.scale_factors)))
            plt.suptitle(f"样本 {idx} 对齐特征可视化", y=1.02)
            
            for i, (scale, tensor) in enumerate(inputs.items()):
                data = tensor.numpy()
                ax = axs[i] if len(self.scale_factors) > 1 else axs
                for j in range(data.shape[1]):
                    ax.plot(data[:, j], label=f'{self.feature_names[j]}')
                ax.set_title(f"{scale}尺度特征序列")
                ax.legend(loc='upper right')
                ax.grid(True)
                
            plt.tight_layout()
            plt.show()
            
            # 打印目标值信息
            print(f"\n目标值（{self.pred_length}步后预测）: {target.item():.4f}")
            print("-"*50)
    def show_aligned_samples(self, num_samples=3):
        """可视化随机样本的对齐结果"""
        for _ in range(num_samples):
            idx = np.random.randint(0, len(self))
            aligned = self._align_scales(idx)
            # 创建带表格的可视化布局
            fig = plt.figure(figsize=(16, 12))
            
            # 第一部分：原始数据对比
            for i, scale in enumerate(self.scale_factors, 1):
                # 获取原始数据片段
                raw_data = self.data[scale][idx:idx+self.seq_length]
                if isinstance(raw_data, pd.DataFrame):
                    raw_data = raw_data.values  # 转换为numpy数组
                assert len(raw_data) > 0, f"{scale}数据长度不足，Dataset cannot be empty"
                # 创建双子图（曲线+表格）
                ax1 = plt.subplot(len(self.scale_factors), 2, 2*i-1)
                ax1.plot(raw_data[:, 0], 'g-', label='原始数据')
                ax1.plot(aligned[scale][:, 0], 'b--', label='对齐数据')
                ax1.set_title(f'{scale} 对齐对比')
                
                # 表格子图
                ax2 = plt.subplot(len(self.scale_factors), 2, 2*i)
                ax2.axis('off')
                # 构建对比数据表
                comparison_df = pd.DataFrame({
                    '指标': ['均值', '标准差', '最大值', '最小值'],
                    '原始数据': [
                        raw_data[:,0].mean(),
                        raw_data[:,0].std(),
                        raw_data[:,0].max(),
                        raw_data[:,0].min()
                    ],
                    '对齐数据': [
                        aligned[scale][:,0].mean(),
                        aligned[scale][:,0].std(),
                        aligned[scale][:,0].max(),
                        aligned[scale][:,0].min()
                    ]
                })
                comparison_df.columns = ['指标', '原始数据', '对齐数据']
                ax2.table(cellText=comparison_df.values,
                        colLabels=comparison_df.columns,
                        loc='center',
                        cellLoc='center',
                        colColours=['#f0f0f0']*3)  # 添加列颜色
            
            plt.tight_layout()
            plt.show()
    def __getitem__(self, idx):
        inputs = self._align_scales(idx)

        # #  # 确保填充后的维度一致性
        # # for k,v in inputs.items():
        # #     assert v.shape[1] == 4, f"特征维度错误: {k}的维度为{v.shape[1]}"

        #  # 统一所有尺度特征维度为4
        # aligned = {k: np.pad(v, ((0,0),(0,1))) if v.shape[1]==3 else v 
        #       for k,v in inputs.items()}
        base_idx = idx + self.seq_length  # 确保索引不超过数据长度
        target = self.data['30min'][base_idx + self.pred_length - 1, 0]  # 从[4]变为标量

        # # 添加安全索引检查
        # try:
        #     inputs = self._align_scales(idx)
        # except IndexError as e:
        #     print(f"索引错误 idx={idx}, 数据长度: {len(self.data['30min'])}")
        #     raise e
        # # 双重验证
        # assert isinstance(target, np.float64), "目标值类型错误"
        target_tensor = torch.tensor(target, dtype=torch.float32)
        aligned = inputs.copy()  # 直接使用原始对齐结果
        return {k:torch.FloatTensor(v) for k,v in aligned.items()}, target_tensor
    def __len__(self):
        return max(0, len(self.data[self.scale_factors[0]]) - self.pred_length - self.seq_length)

class ScaleAwareExpert(nn.Module):
    """专家"""
    def __init__(self, input_dim, scale_type):
        super().__init__()

        self.scale_type = scale_type
        self.input_dim = input_dim  # 动态传入特征维度
        if '30min' in scale_type or '1h' in scale_type:
            """高频专家: CNN + BiLSTM """
            self.conv = nn.Conv1d(input_dim,
                                  out_channels=max(16, input_dim//4),
                                  kernel_size=3, 
                                  padding='same', 
                                  )
            self.lstm = nn.LSTM(input_size= max(16, input_dim//4),
                                hidden_size= input_dim*4, 
                                num_layers=2, 
                                batch_first=True,
                                bidirectional=True)
            self.proj = nn.Linear(input_dim*8,input_dim*2)
        else:
            """低频专家: LSTM """
            self.hidden_dim = max(4, input_dim // 4)  # 统一隐藏层维度
            assert self.hidden_dim >= 4, "特征维度过小"
            self.proj_in = nn.Linear(input_dim, self.hidden_dim)  
            # 确保nhead能整除d_model
            self.nhead = max(2, self.hidden_dim // 8)
            while self.hidden_dim % self.nhead != 0:  # 自动适配可整除的nhead
                self.nhead -= 1
            self.nhead = max(1, self.nhead)  # 至少保留1个头
            self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.hidden_dim,
                                                            nhead=self.nhead,
                                                            dim_feedforward=self.hidden_dim*4, 
                                                            batch_first=True)
            self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=3)
            self.proj = nn.Linear(self.hidden_dim, 32)
        self.predictor = nn.Linear(32 if 'day' in scale_type else input_dim*2, 1)

    def forward(self, x):
        if '30min' in self.scale_type or '1h' in self.scale_type:
            x = self.conv(x.permute(0,2,1)).permute(0,2,1)
            x, _ = self.lstm(x)
            x = x[:, -1, :]  # 取最后一个时间步 [batch, 4]
            x = self.proj(x)
        else:
            x = self.proj_in(x)
            # 添加维度校验
            if x.size(-1) != self.hidden_dim:
                raise ValueError(f"Transformer输入维度错误，期望{self.hidden_dim}，实际{x.size(-1)}")
                
            x = self.transformer(x)
            x = self.proj(x[:,-1,:]) # 只取最后一个时间步的输出
        return self.predictor(x)
class MoE(nn.Module):
    """混合专家网络"""
    def __init__(self, experts=['30min','1hour','4hour','1day'], feature_dims=None):
        super().__init__()
        # 动态计算门控网络输入维度
        total_gate_dim = sum(feature_dims[scale] for scale in experts)
        self.experts = nn.ModuleDict({
            scale: ScaleAwareExpert(input_dim=feature_dims[scale], scale_type=scale) for scale in experts
        })
        self.gate = nn.Sequential(
            nn.Linear(total_gate_dim, total_gate_dim//2),
            nn.ReLU(),
            nn.Linear(total_gate_dim//2, len(experts)),
            nn.Softmax(dim=-1)
        )
        # Add attention-based gate mechanism
        self.attention = nn.MultiheadAttention(embed_dim=32, num_heads=4)
        # Add auxiliary outputs for interpretability
        self.auxiliary = nn.ModuleDict({
            'volatility': nn.Linear(32, 1),
            'trend': nn.Linear(32, 1)
        })

    def forward(self, inputs):
        expert_outputs = {}
        for scale, expert in self.experts.items():
            expert_outputs[scale] = expert(inputs[scale]).squeeze(-1)  # [32,1] -> [32]
        # 动态门控
        gate_input = torch.cat([v for v in inputs.values()], dim=-1)
        weights = self.gate(gate_input.mean(dim=1))  # (batch, num_experts)
        
        # 加权融合
        combined = sum(weights[:, i] * expert_outputs[scale] 
                      for i, scale in enumerate(self.experts.keys()))
        return combined.unsqueeze(-1), expert_outputs  

In [None]:
"""训练模型"""
def train(dataset, model_config: Dict[str, Dict[str, Any]], trade_config: Dict[str, Dict[str, Any]],scale_factors=None):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    loader = DataLoader(dataset, batch_size=model_config['batch_size'], shuffle=True)
    
    model = MoE(experts=scale_factors, feature_dims=dataset.feature_dims).to(device)
    for name, expert in model.experts.items():
        print(f"{name}专家输入维度: {expert.input_dim}")
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
    # print_model_summary(model, device)
    # 添加可视化数据收集
    # ==================== 训练逻辑 ====================
    def hybrid_loss(pred, target, expert_outs, alpha=0.7):
        pred = pred.squeeze(-1)  # (32,1) → (32)
        target = target.squeeze()  # [32] remains
        mse = nn.MSELoss()
        main_loss = mse(pred, target)
        
        # 趋势一致性约束
        trends = torch.stack([torch.sign(out.detach()) for out in expert_outs.values()])
        consistency = torch.mean(torch.prod(trends, dim=0))
        
        return alpha*main_loss + (1-alpha)*(1 - consistency)
    loss_history = []
    expert_weights = []
    predictions = []
    targets = []
    for epoch in range(trade_config['num_epochs']):
        epoch_loss = 0
        model.train()
        for batch_idx, (inputs, target) in enumerate(loader):
            inputs = {k:v.to(device) for k,v in inputs.items()}
            target = target.to(device)
            
            pred, expert_outs = model(inputs)
            loss = hybrid_loss(pred, target, expert_outs)
            
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            # 收集预测结果
            with torch.no_grad():
                preds = pred.squeeze().cpu().numpy()
                predictions.extend(preds)
                targets.extend(target.cpu().numpy())
            
            epoch_loss += loss.item()
        # 记录专家权重
        with torch.no_grad():
            dummy_input = {k: torch.randn(1,24,4).to(device) for k in model.experts}
            _, weights = model(dummy_input)
            expert_weights.append([list(out.parameters())[0].detach().cpu().numpy().mean() for out in model.experts.values()])
        avg_loss = epoch_loss / len(loader)
        loss_history.append(avg_loss)    
        scheduler.step()
        print(f'Epoch {epoch} Loss: {loss.item():.4f}')
   
    return model, (loss_history, expert_weights), (targets, predictions)
dataset = MultiScaleStockDataset(data_path=config['data_path'], scale_factors=config['scale_factors'])
print(f"特征数量: {len(dataset.feature_names)}")  # 应该输出4
print(f"特征维度分布:{dataset.feature_dims}")
print(f"样本维度: {dataset.data['30min'].shape}")  # 应该为 (N,4)
# dataset.visualize_aligned_features(num_samples=2)

# 查看对齐对比（含特征名称）
# dataset.show_aligned_samples(num_samples=3)
model, history, results  = train(dataset, config['props'][0]['model_config'],config['props'][0]['trade_config'], config['scale_factors'])


In [None]:
def visualize_training(loss_history, expert_weights, targets, predictions):
    """训练过程可视化分析"""
    plt.figure(figsize=(15,5))
    
    # 损失曲线
    plt.subplot(1,3,1)
    plt.plot(loss_history, label='Training Loss')
    plt.title("Loss Curve")
    plt.xlabel("Epoch"), plt.ylabel("Loss")
    
    # 专家权重分布
    plt.subplot(1,3,2)
    sns.heatmap(np.array(expert_weights), cmap='viridis', 
                xticklabels=['30min','1h','4h','1d'])
    plt.title("Expert Weights Distribution")
    plt.xlabel("Experts"), plt.ylabel("Epoch")
    
    # 预测结果示例
    plt.subplot(1,3,3)
    plt.scatter(targets[:100], predictions[:100], alpha=0.5)
    plt.plot([min(targets), max(targets)], [min(targets), max(targets)], 'r--')
    plt.title("Predictions vs Ground Truth")
    plt.xlabel("True Values"), plt.ylabel("Predictions")
    
    plt.tight_layout()
    plt.show()
    
def enhanced_visualization(loss_history, expert_weights, param_distributions, config):
    plt.figure(figsize=(18, 12))
    
    # Loss curve
    plt.subplot(2,3,1)
    plt.semilogy(loss_history)
    plt.title("Training Loss (Log Scale)")
    
    # Expert weights heatmap
    plt.subplot(2,3,2)
    sns.heatmap(np.array(expert_weights), cmap='viridis', 
                annot=True, fmt=".2f",
                xticklabels=config["scale_factors"])
    plt.title("Expert Weight Distribution")
    
    # Parameter distributions
    plt.subplot(2,3,3)
    for name, values in param_distributions.items():
        sns.kdeplot(values, label=name)
    plt.title("Parameter Value Distributions")
    plt.legend()
    
    # Gradient flow
    plt.subplot(2,3,4)
    for name, grads in param_distributions.items():
        plt.plot(grads, label=name)
    plt.title("Gradient Flow")
    plt.legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
"""可视化分析"""
visualize_training(history[0], history[1], targets=results[0], predictions=results[1])

In [None]:
def analyze_experts(model, device, test_loader):
    """各专家输出分析"""
    expert_outputs = {name: [] for name in model.experts}
    
    with torch.no_grad():
        for inputs, _ in test_loader:
            inputs = {k:v.to(device) for k,v in inputs.items()}
            _, outputs = model(inputs)
            
            for name, out in outputs.items():
                expert_outputs[name].extend(out.squeeze().cpu().numpy())
    
    plt.figure(figsize=(10,6))
    for name, values in expert_outputs.items():
        sns.kdeplot(values, label=name, alpha=0.6)
    plt.title("Expert Output Distributions")
    plt.xlabel("Prediction Value"), plt.ylabel("Density")
    plt.legend()
    plt.show()


In [None]:
print("\\n=== Expert Analysis ===")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
test_dataset = MultiScaleStockDataset(
    data_path=config['data_path'], 
    scale_factors=config['scale_factors']
)
model_config = config['props'][0]['model_config']
test_loader = DataLoader(test_dataset, batch_size=model_config['batch_size'], shuffle=False)
analyze_experts(model,device, test_loader)