In [8]:
import torch
import accelerate
from accelerate import Accelerator, DeepSpeedPlugin
from accelerate import DistributedDataParallelKwargs
from torch import nn, optim
from torch.optim import lr_scheduler
import evaluate
from utils.tools import get_parameter_number

In [9]:

from data_provider.data_factory import data_provider
import time
import json
import random
import numpy as np
import os
import json
import datetime
import joblib
from typing import Optional
from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    root_mean_squared_error,
)


from logger import Logger

In [10]:
def set_seed(seed):
    accelerate.utils.set_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False
    if torch.cuda.is_available() > 0:
        torch.cuda.manual_seed_all(seed)

In [11]:
args = {
    "task_name": "long_term_forecast",
    "is_training": 1,
    "model_id": "test",
    "model_comment": "none",
    "model": "CPMLP",
    "seed": 2021,
    "charge_discharge_length": 300,
    "dataset": "CALB",
    "data": "BatteryLife",
    "root_path": "./dataset/processed",
    "data_path": "ETTh1.csv",
    "features": "M",
    "target": "OT",
    "loader": "modal",
    "freq": "h",
    "checkpoints": "./checkpoints/",
    "early_cycle_threshold": 100,
    "seq_len": 1,
    "pred_len": 5,
    "label_len": 48,
    "seasonal_patterns": "Monthly",
    "enc_in": 1,
    "dec_in": 1,
    "c_out": 1,
    "d_model": 128,
    "n_heads": 8,
    "lstm_layers": 2,
    "e_layers": 4,
    "d_layers": 2,
    "d_ff": 256,
    "moving_avg": 25,
    "factor": 1,
    "dropout": 0.0,
    "embed": "timeF",
    "activation": "relu",
    "output_attention": False,
    "patch_len": 10,
    "stride": 10,
    "patch_len2": 10,
    "stride2": 10,
    "prompt_domain": 0,
    "output_num": 1,
    "class_num": 8,
    "weighted_loss": False,
    "weighted_sampling": False,
    "num_workers": 1,
    "itr": 1,
    "train_epochs": 100,
    "least_epochs": 5,
    "batch_size": 16,
    "patience": 5,
    "learning_rate": 5e-05,
    "wd": 0.0,
    "des": "test",
    "loss": "MSE",
    "lradj": "constant",
    "pct_start": 0.2,
    "use_amp": False,
    "percent": 100,
    "accumulation_steps": 1,
    "mlp": 0,
    "alpha1": 0.15,
    "alpha2": 0.1,
}

class AttrDict(dict):
    """A dictionary that allows for attribute-style access."""
    def __getattr__(self, name):
        try:
            return self[name]
        except KeyError:
            raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")

    def __setattr__(self, name, value):
        self[name] = value

args = AttrDict(args)

In [12]:
nowtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
set_seed(args.seed)
ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
deepspeed_plugin = DeepSpeedPlugin(hf_ds_config="./ds_config_zero2_baseline.json")
accelerator = Accelerator(
    kwargs_handlers=[ddp_kwargs],
    deepspeed_plugin=deepspeed_plugin,
    gradient_accumulation_steps=args.accumulation_steps,
)
logger: Optional[Logger] = None
if accelerator.is_local_main_process:
    logger = Logger("logs", args.model + args.model_comment)
    logger.log_hparams(vars(args))
accelerator.print(args.__dict__)
for ii in range(args.itr):
    # setting record of experiments
    setting = "{}_sl{}_lr{}_dm{}_nh{}_el{}_dl{}_df{}_lradj{}_dataset{}_loss{}_wd{}_wl{}_bs{}_s{}".format(
        args.model,
        args.seq_len,
        args.learning_rate,
        args.d_model,
        args.n_heads,
        args.e_layers,
        args.d_layers,
        args.d_ff,
        args.lradj,
        args.dataset,
        args.loss,
        args.wd,
        args.weighted_loss,
        args.batch_size,
        args.seed,
    )


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


{}


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class MLPBlock(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, drop_rate):
        super(MLPBlock, self).__init__()
        self.in_linear = nn.Linear(in_dim, hidden_dim)
        self.dropout = nn.Dropout(drop_rate)
        self.out_linear = nn.Linear(hidden_dim, out_dim)
        self.ln = nn.LayerNorm(out_dim)
    
    def forward(self, x):
        '''
        x: [B, *, in_dim]
        '''
        out = self.in_linear(x)
        out = F.relu(out)
        out = self.dropout(out)
        out = self.out_linear(out)
        out = self.ln(self.dropout(out) + x)
        return out



class CPMLP(nn.Module):
    def __init__(self, configs):
        super(CPMLP, self).__init__()
        self.d_ff = configs.d_ff
        self.d_model = configs.d_model
        self.charge_discharge_length = configs.charge_discharge_length
        self.early_cycle_threshold = configs.early_cycle_threshold
        self.drop_rate = configs.dropout
        self.e_layers = configs.e_layers
        self.d_layers = configs.d_layers
        self.intra_flatten = nn.Flatten(start_dim=2)
        self.intra_embed = nn.Linear(self.charge_discharge_length*3, self.d_model)
        self.intra_MLP = nn.ModuleList([MLPBlock(self.d_model, self.d_ff, self.d_model, self.drop_rate) for _ in range(configs.e_layers)])

        self.inter_flatten = nn.Sequential(
            nn.Flatten(start_dim=1), 
            nn.Linear(self.early_cycle_threshold*self.d_model, self.d_model)
        )
        self.inter_MLP = nn.ModuleList([MLPBlock(self.d_model, self.d_ff, self.d_model, self.drop_rate) for _ in range(configs.d_layers)])
        self.head_output = nn.Linear(self.d_model, 1)

    def forward(self, cycle_curve_data, curve_attn_mask, return_embedding=False):
        '''
        cycle_curve_data: [B, early_cycle, fixed_len, num_var]
        curve_attn_mask: [B, early_cycle]
        '''
        print(f"input shape: {cycle_curve_data.shape}")
        cycle_curve_data = self.intra_flatten(cycle_curve_data) # [B, early_cycle, fixed_len * num_var]
        print(f"after flatten shape: {cycle_curve_data.shape}")
        cycle_curve_data = self.intra_embed(cycle_curve_data)
        print(f"after embed shape | encode input: {cycle_curve_data.shape}")
        for i in range(self.e_layers):
            cycle_curve_data = self.intra_MLP[i](cycle_curve_data) # [B, early_cycle, d_model]
        print(f"after encode shape: {cycle_curve_data.shape}")
        cycle_curve_data = self.inter_flatten(cycle_curve_data) # [B, d_model]
        for i in range(self.d_layers):
            cycle_curve_data = self.inter_MLP[i](cycle_curve_data) # [B, d_model]
        print(f"after decode shape: {cycle_curve_data.shape}")
        preds = self.head_output(F.relu(cycle_curve_data))
        print(f"pred shape: {preds.shape}")
        if return_embedding:
            return preds, cycle_curve_data
        else:
            return preds


In [20]:
model = CPMLP(args).float()

In [21]:
path = os.path.join(
    args.checkpoints, setting + "-" + args.model_comment
)  # unique checkpoint saving path

accelerator.print("Loading training samples......")
train_data, train_loader = data_provider(
    args, flag="train"
)
label_scaler = train_data.return_label_scaler()
life_class_scaler = train_data.return_life_class_scaler()
accelerator.print("Loading vali samples......")
vali_data, vali_loader = data_provider(
    args,
    flag="val",
    label_scaler=label_scaler,
    life_class_scaler=life_class_scaler,
)
accelerator.print("Loading test samples......")
test_data, test_loader = data_provider(
    args,
    flag="test",
    label_scaler=label_scaler,
    life_class_scaler=life_class_scaler,
)

Loading training samples......


100%|██████████| 17/17 [00:04<00:00,  4.04it/s]


Loading vali samples......


100%|██████████| 5/5 [00:01<00:00,  3.15it/s]


Loading test samples......


100%|██████████| 5/5 [00:01<00:00,  3.90it/s]


In [22]:
for i, (
        cycle_curve_data,
        curve_attn_mask,
        labels,
        life_class,
        scaled_life_class,
        weights,
        seen_unseen_ids,
    ) in enumerate(train_loader):
    print(cycle_curve_data.shape)
    print(curve_attn_mask.shape)
    print(labels.shape)
    print(life_class.shape)
    print(scaled_life_class.shape)
    print(weights.shape)
    print(seen_unseen_ids.shape)

    outputs = model(cycle_curve_data, curve_attn_mask)

    break

torch.Size([16, 100, 3, 300])
torch.Size([16, 100])
torch.Size([16, 1])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([16])
input shape: torch.Size([16, 100, 3, 300])
after flatten shape: torch.Size([16, 100, 900])
after embed shape | encode input: torch.Size([16, 100, 128])
after encode shape: torch.Size([16, 100, 128])
after decode shape: torch.Size([16, 128])
pred shape: torch.Size([16, 1])


In [23]:
outputs.shape

torch.Size([16, 1])

In [24]:
labels

tensor([[ 0.2515],
        [ 0.7662],
        [-1.2982],
        [-1.2982],
        [ 0.7793],
        [-1.3696],
        [-1.4034],
        [-1.4053],
        [-1.4053],
        [ 0.7305],
        [ 0.8751],
        [ 0.7662],
        [ 0.7155],
        [ 0.9390],
        [-1.4034],
        [ 0.7343]])