In [1]:
import torch    
import torch.nn as nn
import torch.nn.functional as F

In [2]:
data = torch.load('Mortality/Mtest.pt')

In [3]:
data0 = data[0:10]

In [4]:
data0.shape

torch.Size([10, 288, 231])

In [5]:
# device = torch.device('cuda:0')

In [6]:
# data0 = data0.to(device)

In [6]:
# from exp.exp_classification import Exp_Classification

## Available Models in the library

| Model Name                    | Type |Reference                    | Works |
|-------------------------------|-------------------------------|-------------------------------|-------------------------------|
| Autoformer.py                 |Transformer|https://arxiv.org/abs/2106.13008, https://openreview.net/pdf?id=I55UqU-M11y| YES
| Crossformer.py                |Transformer|https://openreview.net/forum?id=vSVLM2j9eie| YES
| DLinear.py                    |MLP|https://arxiv.org/pdf/2205.13504.pdf| YES
| ETSformer.py                  |Transformer|https://arxiv.org/abs/2202.01381| YES
| FEDformer.py                  |Transformer|https://proceedings.mlr.press/v162/zhou22g/zhou22g.pdf|YES
| FiLM.py                       |Legendre Memory Model - State Space |https://openreview.net/pdf?id=zTQdHSQUQWc| NO - OOM Error
| FreTS.py                      |Frequency domain MLP|https://arxiv.org/pdf/2311.06184.pdf| NO - only designed for forecasting
| Informer.py                   |Transformer|https://doi.org/10.1609/aaai.v35i12.17325|YES
| Koopa.py                      |Dynamical System|https://arxiv.org/pdf/2305.18803|NO - some error
| LightTS.py                    |MLP|https://arxiv.org/abs/2207.01186|YES
| MICN.py                       |Convolution|https://openreview.net/pdf?id=zt53IDUR1U|YES
| Mamba.py                      |State Space|https://arxiv.org/abs/2312.00752|NO - only designed for forecasting
| MambaSimple.py                |State Space|https://arxiv.org/abs/2312.00752,https://github.com/johnma2006/mamba-minimal/|NO - only designed for forecasting||
| Nonstationary_Transformer.py  |Transformer|https://arxiv.org/abs/2205.14415|YES
| PatchTST.py                   |Transformer|https://arxiv.org/abs/2211.14730|YES
| Pyraformer.py                 |Transformer|https://arxiv.org/abs/2402.05956|YES
| Reformer.py                   |Transformer|https://arxiv.org/abs/2001.04451|YES
| SCINet.py                     |Convolution|http://arxiv.org/abs/2106.09305|NO - only designed for forecasting
| SegRNN.py                     |RNN|https://arxiv.org/abs/2308.11200.pdf|YES
| TSMixer.py                    |MLP|https://arxiv.org/pdf/2303.06053|NO - only designed for forecasting
| TemporalFusionTransformer.py  |--|https://arxiv.org/abs/1912.09363|NO - only designed for forecasting
| TiDE.py                       |MLP|https://arxiv.org/pdf/2304.08424.pdf |NO - only designed for forecasting
| TimeMixer.py                  |MLP|https://arxiv.org/abs/2405.14616|YES
| TimesNet.py                   |Convolution|https://openreview.net/pdf?id=ju_Uqw384Oq, https://github.com/thuml/TimesNet|YES
| Transformer.py                |Transformer|https://arxiv.org/abs/1706.03762|YES
| iTransformer.py               |Transformer|https://arxiv.org/abs/2310.06625|YES

## Available Models as per Medformer benchmark

| Model Name       | Link       | Works
|------------------|------------------|------------------|
| Autoformer       |https://arxiv.org/abs/2106.13008| YES
| Crossformer      |https://openreview.net/forum?id=vSVLM2j9eie| YES
| FEDformer        |https://proceedings.mlr.press/v162/zhou22g/zhou22g.pdf|YES
| Informer         |https://doi.org/10.1609/aaai.v35i12.17325|YES
| iTransformer     |https://arxiv.org/abs/2310.06625|YES
| MTST             |https://proceedings.mlr.press/v238/zhang24l/zhang24l.pdf|YES
| Nonformer        |https://arxiv.org/abs/2205.14415|YES
| PatchTST         |https://arxiv.org/abs/2211.14730|YES
| Pathformer       |https://arxiv.org/abs/2402.05956|Code NA in Benchmark
| Reformer         |https://arxiv.org/abs/2001.04451|YES
| Transformer      |https://arxiv.org/abs/1706.03762|YES
| Medformer        |https://arxiv.org/pdf/2405.19363|YES

## Setup Arguments for models

In [5]:
class Args:
    def __init__(self):
        self.task_name = 'classification'  # Set the task to classification
        self.seq_len = 288  # Input sequence length
        self.label_len = 0  # Not needed for classification, so set to 0
        self.pred_len = 0  # Not needed for classification, so set to 0
        self.enc_in = 231  # Input feature dimension (number of features)
        self.dec_in = 231  # Input feature dimension (number of features) ## required for FEDformer
        self.num_class = 1  # Number of classes for classification
        self.d_model = 16  # Model's hidden dimension
        self.e_layers = 6  # Number of encoder layers
        self.d_layers = 6  # Number of encoder layers
        self.d_ff = 16  # Feed-forward dimension
        self.dropout = 0.1  # Dropout rate
        self.factor = 5  # AutoCorrelation factor
        self.n_heads = 8  # Number of attention heads
        self.moving_avg = 25  # Moving average kernel size
        self.activation = 'gelu'  # Activation function for the model
        self.embed = 'timeF'  # Embedding type (can vary based on implementation)
        self.freq = 'h'  # Frequency of the time feature
        # self.c_out = 1  # Output feature dimension (for classification, it's the number of classes) ## this is valid for some model but
        self.c_out = 231  # Output feature dimension (for classification, it's the number of classes) ## valid for MICN
        self.top_k = 1 # required in ETSformer
        self.distil= False ## required in Informer
        self.patch_len_list = '4,8,16' ## required in MTST
        self.output_attention=False
        self.p_hidden_dims = [16] ## Req in Nonstationary Transformer
        self.p_hidden_layers = 1 ## Req in Nonstationary Transformer
        self.single_channel = True ## Medformer
        self.augmentations = 'jitter0.2,scale0.2,drop0.5' ## Medformer
        self.no_inter_attn = False ## Medformer
        self.seg_len = 32
        ## Req for TimeMixer ------------------
        self.down_sampling_window = 1 
        self.channel_independence = False
        self.decomp_method = 'moving_avg'
        # self.decomp_method = 'dft_decomp'
        self.down_sampling_layers = 1
        self.use_norm = 0
        self.down_sampling_method = 'avg'
        ## Req for TimesNet ------
        self.num_kernels = 2
        
        # GPU settings
        self.use_multi_gpu = False
        self.use_gpu = True
        self.gpu = 0  # Use the first GPU (can be adjusted if using multiple)
        self.devices = '0'  # Device ids (if using multiple GPUs)



In [6]:
# Instantiate the args object
args = Args()

In [20]:
# temp_mask = torch.ones(10, 288) 

## Autoformer

In [34]:
# from models.Autoformer import Model

In [31]:
# Instantiate the Autoformer model
model = Model(args).float()

In [32]:
# model

In [33]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 1082634


In [13]:
out = model(data0, temp_mask, None, None)

In [14]:
out

tensor([[-0.3617],
        [-0.1915],
        [-0.2387],
        [ 0.1611],
        [-0.1069],
        [ 0.2503],
        [-0.5381],
        [ 0.1001],
        [ 0.0196],
        [-0.5541]], grad_fn=<AddmmBackward0>)

## Crossformer

In [9]:
from models.Crossformer import Model

In [12]:
# Instantiate the Autoformer model
model = Model(args).float()

In [16]:
out = model(data0, temp_mask, None, None)

In [17]:
out

tensor([[0.1584],
        [0.0566],
        [0.3123],
        [0.2433],
        [0.7558],
        [0.4653],
        [0.0824],
        [0.3751],
        [0.4441],
        [0.3370]], grad_fn=<AddmmBackward0>)

## DLinear -- not in the medformer benchmark

In [19]:
from models.DLinear import Model

In [20]:
# Instantiate the Autoformer model
model = Model(args).float()

In [21]:
out = model(data0, temp_mask, None, None)

In [22]:
out

tensor([[-0.0138],
        [ 0.0093],
        [-0.0014],
        [ 0.0107],
        [-0.0177],
        [-0.0002],
        [ 0.0110],
        [-0.0093],
        [ 0.0025],
        [-0.0041]], grad_fn=<AddmmBackward0>)

## ETSformer - not in the medformer benchmark

In [34]:
from models.ETSformer import Model

In [35]:
model = Model(args).float()

In [36]:
out = model(data0, temp_mask, None, None)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [37]:
out

tensor([[-0.3028],
        [-0.0773],
        [ 0.0931],
        [-0.4282],
        [-0.7638],
        [-0.3587],
        [-0.4388],
        [ 0.3406],
        [-0.2195],
        [ 0.0528]], grad_fn=<AddmmBackward0>)

## FEDformer

In [42]:
from models.FEDformer import Model

In [45]:
model = Model(args).float()

fourier enhanced block used!
modes=32, index=[8, 13, 18, 21, 24, 26, 35, 36, 38, 42, 44, 55, 70, 73, 84, 86, 87, 88, 93, 95, 103, 108, 110, 113, 115, 118, 119, 121, 128, 139, 141, 143]
fourier enhanced block used!
modes=32, index=[0, 4, 6, 10, 12, 14, 15, 16, 19, 20, 21, 23, 24, 29, 33, 34, 37, 39, 41, 45, 47, 48, 50, 54, 56, 57, 58, 59, 62, 67, 69, 70]
 fourier enhanced cross attention used!
modes_q=32, index_q=[6, 7, 13, 14, 15, 17, 18, 19, 20, 21, 23, 24, 25, 27, 30, 31, 32, 33, 34, 36, 38, 41, 42, 44, 46, 50, 51, 53, 54, 63, 66, 70]
modes_kv=32, index_kv=[2, 4, 13, 14, 15, 21, 27, 30, 32, 36, 39, 45, 46, 48, 51, 67, 68, 86, 87, 92, 97, 98, 99, 103, 106, 109, 125, 134, 136, 139, 140, 141]


In [46]:
out = model(data0, temp_mask, None, None)

In [47]:
out

tensor([[-0.1858],
        [ 0.2457],
        [-0.3740],
        [ 0.1871],
        [-0.2296],
        [-0.2738],
        [-0.1334],
        [ 0.5448],
        [ 0.3772],
        [-0.2477]], grad_fn=<AddmmBackward0>)

## FiLM - Frequency improved Legendre Memory Model for Long-term Time Series Forecasting - not in medformer benchmark

In [8]:
from models.FiLM import Model

In [15]:
model = Model(args).float()
device = torch.device('cuda:0')
model.to(device)

Model(
  (legts): ModuleList(
    (0-2): 3 x HiPPO_LegT()
  )
  (spec_conv_1): ModuleList(
    (0-2): 3 x SpectralConv1d()
  )
  (mlp): Linear(in_features=3, out_features=1, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (projection): Linear(in_features=66528, out_features=1, bias=True)
)

In [16]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 12649907


In [18]:
# out = model(data0, temp_mask, None, None)

## Informer

In [8]:
from models.Informer import Model

In [9]:
model = Model(args).float()

In [10]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 2203522


In [13]:
out = model(data0, temp_mask, None, None)

In [14]:
out

tensor([[-0.2961],
        [-0.1234],
        [-0.3028],
        [-0.4261],
        [-0.3749],
        [ 0.0280],
        [ 0.1748],
        [-0.1680],
        [ 0.0282],
        [-0.2995]], grad_fn=<AddmmBackward0>)

## iTransformer: Inverted Transformers 

In [16]:
from models.iTransformer import Model

In [17]:
model = Model(args).float()

In [18]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 861697


In [19]:
out = model(data0, temp_mask, None, None)

In [20]:
out

tensor([[0.0837],
        [0.3556],
        [1.3128],
        [0.1028],
        [0.1732],
        [0.7815],
        [0.0868],
        [0.1491],
        [0.3529],
        [0.5075]], grad_fn=<AddmmBackward0>)

## MTST -  not in time series library - part of medformer benchmark

In [8]:
from models.MTST import Model

In [9]:
model = Model(args).float()

In [10]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 507857


In [11]:
out = model(data0, temp_mask, None, None)

In [12]:
out

tensor([[0.8156],
        [0.8147],
        [1.3681],
        [0.9639],
        [1.6558],
        [1.4902],
        [1.9889],
        [1.9877],
        [0.9710],
        [1.0086]], grad_fn=<AddmmBackward0>)

## Non-former - non-stationary transformer

In [9]:
from models.Nonstationary_Transformer import Model

In [19]:
model = Model(args).float()

In [20]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 47137


In [21]:
out = model(data0, temp_mask, None, None)

In [22]:
out

tensor([[ 0.1287],
        [-0.1771],
        [-0.2860],
        [-0.2683],
        [ 0.3625],
        [ 0.2088],
        [ 0.0486],
        [ 0.1083],
        [-0.3531],
        [ 0.0852]], grad_fn=<AddmmBackward0>)

## PATCH-TST

In [23]:
from models.PatchTST import Model

In [24]:
model = Model(args).float()

In [25]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 143521


In [26]:
out = model(data0, temp_mask, None, None)

In [27]:
out

tensor([[ 0.2955],
        [ 0.7070],
        [-0.9226],
        [ 0.2666],
        [-0.4260],
        [-0.1465],
        [ 0.1830],
        [-0.3104],
        [-0.0689],
        [-0.2544]], grad_fn=<AddmmBackward0>)

## Reformer

In [28]:
from models.Reformer import Model

In [29]:
model = Model(args).float()

In [30]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 24145


In [31]:
out = model(data0, temp_mask, None, None)

In [32]:
out

tensor([[-0.1137],
        [ 0.0874],
        [-0.0072],
        [-0.4799],
        [ 0.1631],
        [-0.5913],
        [-0.3488],
        [-0.2850],
        [-0.1931],
        [ 0.2805]], grad_fn=<AddmmBackward0>)

## Transformer  - Standard - attention is all you need

In [33]:
from models.Transformer import Model

In [34]:
model = Model(args).float()

In [35]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 25969


In [36]:
out = model(data0, temp_mask, None, None)

In [37]:
out

tensor([[-0.2213],
        [ 0.0991],
        [-0.4337],
        [-0.2250],
        [-0.3431],
        [-0.4161],
        [-0.0944],
        [-0.3625],
        [ 0.0429],
        [-0.0595]], grad_fn=<AddmmBackward0>)

## Medformer

In [8]:
from models.Medformer import Model

In [9]:
model = Model(args).float()

In [10]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 507073


In [11]:
out = model(data0, temp_mask, None, None)

In [12]:
out

tensor([[ 0.1737],
        [ 0.3458],
        [-0.0243],
        [ 0.1276],
        [ 0.4908],
        [ 0.1446],
        [-0.1049],
        [ 0.3501],
        [-0.1712],
        [-0.0282]], grad_fn=<AddmmBackward0>)

## other models in library not in medformer

## FreTS -- only designed for forecasting

In [13]:
# from models.FreTS import Model
# model = Model(args).float()
# out = model(data0, temp_mask, None, None)

## Koopa - some error

In [23]:
# from models.Koopa import Model
# model = Model(args).float()

## LightTS

In [24]:
from models.LightTS import Model

In [25]:
model = Model(args).float()

In [26]:
out = model(data0, temp_mask, None, None)

In [28]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 204871


In [29]:
out

tensor([[ 0.0654],
        [ 0.0211],
        [ 0.2360],
        [-0.0221],
        [ 0.0389],
        [-0.0444],
        [-0.1987],
        [-0.5875],
        [-0.3315],
        [ 0.0381]], grad_fn=<AddmmBackward0>)

## MICN

In [11]:
from models.MICN import Model

In [27]:
# del model

In [28]:
model = Model(args).float()

In [35]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 252296


In [5]:
# model.to(device)

In [30]:
data0.shape

torch.Size([10, 288, 231])

In [7]:
temp_mask = torch.ones(10, 288) 

In [8]:
# temp_mask = temp_mask.to(device)

In [33]:
out = model(data0, temp_mask, None, None)

output shape after dropout torch.Size([10, 288, 231])


In [34]:
out

tensor([[-0.2415],
        [-0.2294],
        [-0.4476],
        [-0.0495],
        [-0.3401],
        [-0.3558],
        [-0.1783],
        [-0.1052],
        [-0.4631],
        [ 0.0183]], device='cuda:0', grad_fn=<AddmmBackward0>)

## Pyraformer

In [16]:
from models.Pyraformer import Model

In [17]:
model = Model(args).float()

In [18]:
out = model(data0, temp_mask, None, None)

In [19]:
out

tensor([[-0.1359],
        [-0.0692],
        [ 0.0986],
        [-0.2031],
        [-0.2461],
        [-0.0591],
        [-0.1812],
        [-0.0093],
        [-0.2031],
        [-0.1888]], grad_fn=<AddmmBackward0>)

## SCINet  - only forecasting

In [25]:
# from models.SCINet import Model

In [26]:
# model = Model(args).float()

In [27]:
# out = model(data0, temp_mask, None, None)

## SegRNN

In [28]:
from models.SegRNN import Model

In [32]:
model = Model(args).float()

In [33]:
out = model(data0, temp_mask, None, None)

In [34]:
out

tensor([[-0.1167],
        [-0.4970],
        [ 0.1222],
        [-0.0476],
        [-0.3599],
        [-0.5090],
        [-0.1588],
        [-0.5523],
        [ 0.1799],
        [ 0.3940]], grad_fn=<AddmmBackward0>)

## TSMixer -- only forecasting

In [38]:
# from models.TSMixer import Model
# model = Model(args).float()
# out = model(data0, temp_mask, None, None)

## TemporalFusionTransformer -- only forecasting

In [None]:
# out = model(data0, temp_mask, None, None)

## TiDE -- only forecasting

In [44]:
# from models.TiDE import Model
# model = Model(args).float()
# out = model(data0, temp_mask, None, None)

## TimeMixer -- has multiple variants

In [89]:
from models.TimeMixer import Model

In [120]:
model = Model(args).float()

In [121]:
out = model(data0, temp_mask, None, None)

In [122]:
out

tensor([[-0.0963],
        [-0.0755],
        [-0.0567],
        [-0.0482],
        [-0.0266],
        [-0.0754],
        [-0.0576],
        [-0.1013],
        [-0.0866],
        [-0.0795]], grad_fn=<AddmmBackward0>)

## TimesNet

In [9]:
from models.TimesNet import Model

In [10]:
model = Model(args).float()

In [11]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 46897


In [12]:
out = model(data0, temp_mask, None, None)

In [13]:
out

tensor([[ 0.1337],
        [-0.3937],
        [ 0.5154],
        [-0.1321],
        [-0.5857],
        [ 0.1846],
        [-0.0186],
        [-0.3671],
        [ 0.0922],
        [-0.2147]], grad_fn=<AddmmBackward0>)

# So, finally which models worked

| Model Name                    | Type |Reference                    | Works |
|-------------------------------|-------------------------------|-------------------------------|-------------------------------|
| Autoformer.py                 |Transformer|https://arxiv.org/abs/2106.13008, https://openreview.net/pdf?id=I55UqU-M11y| YES
| Crossformer.py                |Transformer|https://openreview.net/forum?id=vSVLM2j9eie| YES
| DLinear.py                    |MLP|https://arxiv.org/pdf/2205.13504.pdf| YES
| ETSformer.py                  |Transformer|https://arxiv.org/abs/2202.01381| YES
| FEDformer.py                  |Transformer|https://proceedings.mlr.press/v162/zhou22g/zhou22g.pdf|YES
| Informer.py                   |Transformer|https://doi.org/10.1609/aaai.v35i12.17325|YES
| LightTS.py                    |MLP|https://arxiv.org/abs/2207.01186|YES
| MTST.py                       |Transformer|https://arxiv.org/abs/2207.01186|YES
| MICN.py                       |Convolution|https://openreview.net/pdf?id=zt53IDUR1U|YES
| Nonstationary_Transformer.py  |Transformer|https://arxiv.org/abs/2205.14415|YES
| PatchTST.py                   |Transformer|https://arxiv.org/abs/2211.14730|YES
| Pyraformer.py                 |Transformer|https://arxiv.org/abs/2402.05956|YES
| Reformer.py                   |Transformer|https://arxiv.org/abs/2001.04451|YES
| SegRNN.py                     |RNN|https://arxiv.org/abs/2308.11200.pdf|YES
| TimeMixer.py                  |MLP|https://arxiv.org/abs/2405.14616|YES
| TimesNet.py                   |Convolution|https://openreview.net/pdf?id=ju_Uqw384Oq, https://github.com/thuml/TimesNet|YES
| Transformer.py                |Transformer|https://arxiv.org/abs/1706.03762|YES
| iTransformer.py               |Transformer|https://arxiv.org/abs/2310.06625|YES
| Medformer.py                  |Transformer|https://arxiv.org/pdf/2405.19363|YES