In [6]:
from deepdow.data import InRAMDataset, RigidDataLoader
from deepdow.layers import SoftmaxAllocator
from deepdow.losses import MeanReturns, SharpeRatio, MaximumDrawdown
#from deepdow.layers import ConvNetwork
from deepdow.experiments import Run
from deepdow.losses import MeanReturns, SharpeRatio, MaximumDrawdown
from deepdow.benchmarks import OneOverN, InverseVolatility, Random, MinimumVariance
from deepdow.callbacks import EarlyStoppingCallback
from torch.utils.data import TensorDataset
import matplotlib.pyplot as plt
from deepdow.utils import raw_to_Xy
from deepdow.benchmarks import Benchmark, OneOverN, Random, MaximumReturn
from deepdow.experiments import Run
from deepdow.callbacks import EarlyStoppingCallback, ModelCheckpointCallback, Callback
from deepdow.visualize import generate_metrics_table, generate_weights_table, plot_metrics, plot_weight_heatmap

In [7]:
configs_dict = {
    'task_name': 'long_term_forecast',
    'features': 'M',  # or 'S' for single variable
    'seq_len': 96,
    'label_len': 24,
    'pred_len': 24,
    'use_norm': True,
    'patch_len': 12,
    'enc_in': 7,  # number of input features
    'd_model': 64,  # model dimension
    'embed': 'fixed',  # embedding type
    'freq': 'd',  # frequency of the data
    'dropout': 0.1,
    'factor': 5,  # attention factor
    'n_heads': 8,  # number of attention heads
    'd_ff': 256,  # feed-forward dimension
    'e_layers': 3,  # number of encoder layers
    'activation': 'relu',  # activation function
    'augmentation_ratio': 0,  # augmentation ratio
    'seed': 42,  #
}


In [8]:
# 把configs转换为一个类
class Configs:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

In [9]:
configs = Configs(**configs_dict)

In [10]:
from data_provider.data_loader import Dataset_Custom, custom_collate_fn
from torch.utils.data import Dataset, DataLoader
train_dataset = Dataset_Custom(
    args=configs,
    root_path='.',
    flag='train',
    size=[configs.seq_len, configs.label_len, configs.pred_len],
    features=configs.features,
    data_path='mag7_ticker_first.parquet',
    target='Close',
    scale=True,
    timeenc=1,
    freq='D'
)
test_dataset = Dataset_Custom(
    args=configs,
    root_path='.',
    flag='test',
    size=[configs.seq_len, configs.label_len, configs.pred_len],
    features=configs.features,
    data_path='mag7_ticker_first.parquet',
    target='Close',
    scale=True,
    timeenc=1,
    freq='D'
)

train_dataloader = DataLoader(train_dataset, collate_fn=custom_collate_fn, batch_size=32, shuffle=True)
val_dataloader = DataLoader(test_dataset, collate_fn=custom_collate_fn, batch_size=32, shuffle=True)  

In [11]:
for batch in val_dataloader:
    X_batch, y_batch, timestamps_batch, *kw = batch
    print("X shape:", X_batch.shape)
    print("y shape:", y_batch.shape)
    print("Timestamps:", timestamps_batch)
    print("kw shape:", [k for k in kw])
    break  # 只打印第一个批次

X shape: torch.Size([32, 5, 96, 7])
y shape: torch.Size([32, 5, 48, 7])
Timestamps: [numpy.datetime64('2024-12-20T00:00:00.000000000'), numpy.datetime64('2021-12-31T00:00:00.000000000'), numpy.datetime64('2021-07-21T00:00:00.000000000'), numpy.datetime64('2022-03-09T00:00:00.000000000'), numpy.datetime64('2023-12-05T00:00:00.000000000'), numpy.datetime64('2021-12-14T00:00:00.000000000'), numpy.datetime64('2024-02-01T00:00:00.000000000'), numpy.datetime64('2021-10-06T00:00:00.000000000'), numpy.datetime64('2024-05-02T00:00:00.000000000'), numpy.datetime64('2024-05-31T00:00:00.000000000'), numpy.datetime64('2024-12-26T00:00:00.000000000'), numpy.datetime64('2021-12-27T00:00:00.000000000'), numpy.datetime64('2021-06-28T00:00:00.000000000'), numpy.datetime64('2024-09-19T00:00:00.000000000'), numpy.datetime64('2024-08-21T00:00:00.000000000'), numpy.datetime64('2023-06-22T00:00:00.000000000'), numpy.datetime64('2022-03-08T00:00:00.000000000'), numpy.datetime64('2022-03-07T00:00:00.000000000'

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import DataEmbedding_inverted, PositionalEmbedding
import numpy as np


class FlattenHead(nn.Module):
    def __init__(self, n_vars, nf, target_window, head_dropout=0):
        super().__init__()
        self.n_vars = n_vars
        self.flatten = nn.Flatten(start_dim=-2)
        self.linear = nn.Linear(nf, target_window)
        self.dropout = nn.Dropout(head_dropout)

    def forward(self, x):  # x: [bs x nvars x d_model x patch_num]
        x = self.flatten(x)
        x = self.linear(x)
        x = self.dropout(x)
        return x


class EnEmbedding(nn.Module):
    def __init__(self, n_vars, d_model, patch_len, dropout):
        super(EnEmbedding, self).__init__()
        # Patching
        self.patch_len = patch_len

        self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
        self.glb_token = nn.Parameter(torch.randn(1, n_vars, 1, d_model))
        self.position_embedding = PositionalEmbedding(d_model)

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # do patching
        n_vars = x.shape[1]
        glb = self.glb_token.repeat((x.shape[0], 1, 1, 1))

        x = x.unfold(dimension=-1, size=self.patch_len, step=self.patch_len)
        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
        # Input encoding
        x = self.value_embedding(x) + self.position_embedding(x)
        x = torch.reshape(x, (-1, n_vars, x.shape[-2], x.shape[-1]))
        x = torch.cat([x, glb], dim=2)
        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
        return self.dropout(x), n_vars


class Encoder(nn.Module):
    def __init__(self, layers, norm_layer=None, projection=None):
        super(Encoder, self).__init__()
        self.layers = nn.ModuleList(layers)
        self.norm = norm_layer
        self.projection = projection

    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
        for layer in self.layers:
            x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)

        if self.norm is not None:
            x = self.norm(x)

        if self.projection is not None:
            x = self.projection(x)
        return x


class EncoderLayer(nn.Module):
    def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
                 dropout=0.1, activation="relu"):
        super(EncoderLayer, self).__init__()
        d_ff = d_ff or 4 * d_model
        self.self_attention = self_attention
        self.cross_attention = cross_attention
        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        self.activation = F.relu if activation == "relu" else F.gelu

    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
        B, L, D = cross.shape
        x = x + self.dropout(self.self_attention(
            x, x, x,
            attn_mask=x_mask,
            tau=tau, delta=None
        )[0])
        x = self.norm1(x)

        x_glb_ori = x[:, -1, :].unsqueeze(1)
        x_glb = torch.reshape(x_glb_ori, (B, -1, D))
        x_glb_attn = self.dropout(self.cross_attention(
            x_glb, cross, cross,
            attn_mask=cross_mask,
            tau=tau, delta=delta
        )[0])
        x_glb_attn = torch.reshape(x_glb_attn,
                                   (x_glb_attn.shape[0] * x_glb_attn.shape[1], x_glb_attn.shape[2])).unsqueeze(1)
        x_glb = x_glb_ori + x_glb_attn
        x_glb = self.norm2(x_glb)

        y = x = torch.cat([x[:, :-1, :], x_glb], dim=1)

        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
        y = self.dropout(self.conv2(y).transpose(-1, 1))

        return self.norm3(x + y)


class Model(nn.Module, Benchmark):

    def __init__(self, configs):
        super(Model, self).__init__()
        self.task_name = configs.task_name
        self.features = configs.features
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        self.use_norm = configs.use_norm
        self.patch_len = configs.patch_len
        self.patch_num = int(configs.seq_len // configs.patch_len)
        self.n_vars = 1 if configs.features == 'MS' else configs.enc_in
        # Embedding
        self.en_embedding = EnEmbedding(self.n_vars, configs.d_model, self.patch_len, configs.dropout)

        self.ex_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq,
                                                   configs.dropout)

        # Encoder-only architecture
        self.encoder = Encoder(
            [
                EncoderLayer(
                    AttentionLayer(
                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
                                      output_attention=False),
                        configs.d_model, configs.n_heads),
                    AttentionLayer(
                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
                                      output_attention=False),
                        configs.d_model, configs.n_heads),
                    configs.d_model,
                    configs.d_ff,
                    dropout=configs.dropout,
                    activation=configs.activation,
                )
                for l in range(configs.e_layers)
            ],
            norm_layer=torch.nn.LayerNorm(configs.d_model)
        )
        self.head_nf = configs.d_model * (self.patch_num + 1)
        self.head = FlattenHead(configs.enc_in, self.head_nf, configs.pred_len,
                                head_dropout=configs.dropout)

    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
        if self.use_norm:
            # Normalization from Non-stationary Transformer
            means = x_enc.mean(1, keepdim=True).detach()
            x_enc = x_enc - means
            stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
            x_enc /= stdev

        _, _, N = x_enc.shape

        en_embed, n_vars = self.en_embedding(x_enc[:, :, -1].unsqueeze(-1).permute(0, 2, 1))
        ex_embed = self.ex_embedding(x_enc[:, :, :-1], x_mark_enc)

        enc_out = self.encoder(en_embed, ex_embed)
        enc_out = torch.reshape(
            enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
        # z: [bs x nvars x d_model x patch_num]
        enc_out = enc_out.permute(0, 1, 3, 2)

        dec_out = self.head(enc_out)  # z: [bs x nvars x target_window]
        dec_out = dec_out.permute(0, 2, 1)

        if self.use_norm:
            # De-Normalization from Non-stationary Transformer
            dec_out = dec_out * (stdev[:, 0, -1:].unsqueeze(1).repeat(1, self.pred_len, 1))
            dec_out = dec_out + (means[:, 0, -1:].unsqueeze(1).repeat(1, self.pred_len, 1))

        return dec_out


    def forecast_multi(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
        if self.use_norm:
            # Normalization from Non-stationary Transformer
            means = x_enc.mean(1, keepdim=True).detach()
            x_enc = x_enc - means
            stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
            x_enc /= stdev

        _, _, N = x_enc.shape

        en_embed, n_vars = self.en_embedding(x_enc.permute(0, 2, 1))
        ex_embed = self.ex_embedding(x_enc, x_mark_enc)

        enc_out = self.encoder(en_embed, ex_embed)
        enc_out = torch.reshape(
            enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
        # z: [bs x nvars x d_model x patch_num]
        enc_out = enc_out.permute(0, 1, 3, 2)

        dec_out = self.head(enc_out)  # z: [bs x nvars x target_window]
        dec_out = dec_out.permute(0, 2, 1)

        if self.use_norm:
            # De-Normalization from Non-stationary Transformer
            dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
            dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))

        return dec_out

    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
            if self.features == 'M':
                dec_out = self.forecast_multi(x_enc, x_mark_enc, x_dec, x_mark_dec)
                return dec_out[:, -self.pred_len:, :]  # [B, L, D]
            else:
                dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
                return dec_out[:, -self.pred_len:, :]  # [B, L, D]
        else:
            return None

In [8]:
network = Model(configs)

In [9]:
loss = MaximumDrawdown(input_type='simple') + MeanReturns(input_type='simple') + 10 * SharpeRatio(input_type='simple', rf=0.000)

In [10]:
import time
from deepdow.experiments import Run, SolverError, EarlyStoppingException
class ExtendedRun(Run):
    def launch(self, n_epochs=1):
        try:
            self.network.to(device=self.device, dtype=self.dtype)
            if self.current_epoch == -1:
                self.on_train_begin(metadata={"n_epochs": n_epochs})

            for _ in range(n_epochs):
                self.current_epoch += 1
                self.on_epoch_begin(metadata={"epoch": self.current_epoch})

                for batch_ix, batch in enumerate(self.train_dataloader):
                    # 解包以支持额外变量
                    X_batch, y_batch, timestamps, asset_names, *extra = batch
                    extra_data = extra[0] if extra else None  # 处理无额外数据的情况

                    self.on_batch_begin(
                        metadata={
                            "asset_names": asset_names,
                            "batch": batch_ix,
                            "epoch": self.current_epoch,
                            "timestamps": timestamps,
                            "X_batch": X_batch,
                            "y_batch": y_batch,
                            "extra_data": extra_data,
                        }
                    )

                    X_batch, y_batch = X_batch.to(self.device).to(self.dtype), y_batch.to(self.device).to(self.dtype)
                    if extra_data is not None:
                        extra_data = extra_data.to(self.device).to(self.dtype)

                    self.network.train()
                    
                    X_mark = extra[0]
                    y_mark = extra[1] if len(extra) > 1 else None
                    weights = self.network(X_batch, y_batch, X_mark, y_mark)
                    # 如果支持，将 extra_data 传递给损失函数
                    loss_per_sample = self.loss(weights, y_batch, extra_data=extra_data)
                    loss = loss_per_sample.mean()
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
                    self.network.eval()

                    self.on_batch_end(
                        metadata={
                            "asset_names": asset_names,
                            "batch": batch_ix,
                            "batch_loss": loss.item(),
                            "epoch": self.current_epoch,
                            "timestamps": timestamps,
                            "weights": weights,
                            "X_batch": X_batch,
                            "y_batch": y_batch,
                            "extra_data": extra_data,
                        }
                    )

                self.on_epoch_end(
                    metadata={
                        "epoch": self.current_epoch,
                        "n_epochs": n_epochs,
                    }
                )

            self.on_train_end()

        except (EarlyStoppingException, KeyboardInterrupt, SolverError) as ex:
            print("训练中断")
            time.sleep(1)
            self.on_train_interrupt(metadata={"exception": ex, "locals": locals()})

        return self.history

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
d_model = configs.d_model
n_heads = configs.n_heads
n_layers = configs.e_layers

In [22]:
#获得时间 作为文件名
from datetime import datetime
now = datetime.now()
date_time = now.strftime("%Y-%m-%d %H-%M-%S")
run = ExtendedRun(network,
          loss,
          train_dataloader,
          benchmarks={'OneOverN': OneOverN(),
                      'MaximumReturn': MaximumReturn(),
                     },
          val_dataloaders={'test': val_dataloader},
          optimizer=torch.optim.Adam(network.parameters(), amsgrad=True, lr=0.001),
          callbacks=[EarlyStoppingCallback(metric_name='loss',
                                           dataloader_name='test',
                                           patience=15),
                     ModelCheckpointCallback(folder_path=f'./models/{date_time}_d_model_{d_model}_nhead{n_heads}_num_layers{n_layers}/',
                                             dataloader_name='test',
                                             metric_name='loss')],
          device=device,
          #dtype=torch.float64,
          )

#

In [None]:
history = run.launch(5)