In [5]:
import os
import torch
import argparse
import numpy as np
from Utils.io_utils import load_yaml_config, seed_everything, merge_opts_to_config, instantiate_from_config
from Utils.metric_utils import visualization
import sys
import time
import torch.nn as nn
from tqdm import tqdm
from torch.optim import Adam
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
plt.rc('font',family='Times New Roman') 

import umap                       
import seaborn as sns 
from tqdm import tqdm

In [6]:
def parse_args():
    parser = argparse.ArgumentParser(description='PyTorch Training Script')
    parser.add_argument('--name', type=str, default=None)

    parser.add_argument('--config_file', type=str, default=None, 
                        help='path of config file')
    parser.add_argument('--output', type=str, default='OUTPUT', 
                        help='directory to save the results')
    parser.add_argument('--tensorboard', action='store_true', 
                        help='use tensorboard for logging')

    # args for random

    parser.add_argument('--cudnn_deterministic', action='store_true', default=False,
                        help='set cudnn.deterministic True')
    parser.add_argument('--seed', type=int, default=10, 
                        help='seed for initializing training.')
    parser.add_argument('--gpu', type=int, default=None,
                        help='GPU id to use. If given, only the specific gpu will be'
                        ' used, and ddp will be disabled')
    
    # args for training
    parser.add_argument('--train', action='store_true', default=False, help='Train or Test.')
    parser.add_argument('--sample', type=int, default=0, 
                        choices=[0, 1], help='Condition or Uncondition.')
    parser.add_argument('--mode', type=str, default='infill',
                        help='Infilling or Forecasting.')
    parser.add_argument('--milestone', type=int, default=10)

    parser.add_argument('--missing_ratio', type=float, default=0., help='Ratio of Missing Values.')
    parser.add_argument('--pred_len', type=int, default=0, help='Length of Predictions.')
    
    
    # args for modify config
    parser.add_argument('opts', help='Modify config options using the command-line',
                        default=None, nargs=argparse.REMAINDER)  

    args = parser.parse_args(args=[])
    args.save_dir = os.path.join(args.output, f'{args.name}')

    return args


In [7]:
args = parse_args()

data_name = 'SP500'
# data_name = 'ETTh1'
# data_name = 'ER'
# data_name = 'Energy'
# data_name = 'weather'


if data_name == 'SP500':
    args.config_file =  './Config/SP500.yaml'

elif data_name == 'ER':
    args.config_file =  './Config/exchange_rate.yaml'
    
elif data_name == 'ETTh1':
    args.config_file =  './Config/ETTh1.yaml'

elif data_name == 'Energy':
    args.config_file =  './Config/Energy.yaml'
    
elif data_name == 'weather':
    args.config_file =  './Config/weather.yaml'
    
elif data_name == 'MuJoCo':
    args.config_file =  './Config/MuJoCo.yaml'
    
args.gpu =  0
args.train = True
print(args)

Namespace(name=None, config_file='./Config/SP500.yaml', output='OUTPUT', tensorboard=False, cudnn_deterministic=False, seed=10, gpu=0, train=True, sample=0, mode='infill', milestone=10, missing_ratio=0.0, pred_len=0, opts=[], save_dir='OUTPUT\\None')


In [8]:
if args.seed is not None:
    seed_everything(args.seed)

if args.gpu is not None:
    torch.cuda.set_device(args.gpu)

config = load_yaml_config(args.config_file)
config = merge_opts_to_config(config, args.opts)


config['dataloader']['batch_size'] = 128
print(config)

beta_schedule = config['model']['params']['beta_schedule'] 
timesteps = config['model']['params']['timesteps'] 
print(beta_schedule, timesteps)

Global seed set to 10
{'model': {'target': 'Models.interpretable_diffusion.gaussian_diffusion.Diffusion_TS', 'params': {'seq_length': 30, 'feature_size': 6, 'n_layer_enc': 2, 'n_layer_dec': 2, 'd_model': 64, 'timesteps': 500, 'sampling_timesteps': 500, 'loss_type': 'l1', 'beta_schedule': 'cosine', 'n_heads': 4, 'mlp_hidden_times': 4, 'attn_pd': 0.0, 'resid_pd': 0.0, 'kernel_size': 1, 'padding_size': 0}}, 'solver': {'base_lr': 1e-05, 'max_epochs': 10000, 'results_folder': './Checkpoints_stock', 'gradient_accumulate_every': 2, 'save_cycle': 500, 'ema': {'decay': 0.995, 'update_interval': 10}, 'scheduler': {'target': 'engine.lr_sch.ReduceLROnPlateauWithWarmup', 'params': {'factor': 0.5, 'patience': 2000, 'min_lr': 1e-05, 'threshold': 0.1, 'threshold_mode': 'rel', 'warmup_lr': 0.0008, 'warmup': 500, 'verbose': False}}}, 'dataloader': {'train_dataset': {'target': 'Utils.Data_utils.real_datasets.CustomDataset', 'params': {'name': 'stock', 'proportion': 1.0, 'data_root': './Data/datasets/stoc

In [9]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

if data_name == 'SP500':
    load_data = np.load( './Data/datasets/sp500.npy')

elif data_name == 'ETTh1':
    load_data = np.load( './Data/datasets/ETTh1.npy')
    
elif data_name == 'ER':
    load_data = np.load( './Data/datasets/exchange_rate.npy')
    
elif data_name == 'Energy':
    load_data = np.load( './Data/datasets/Energy.npy')
    
elif data_name == 'weather':
    load_data = np.load( './Data/datasets/weather.npy')

elif data_name == 'MuJoCo':
    load_data = np.load( './Data/datasets/MuJoCo.npy')
    
    
np.random.shuffle(load_data)
train_data = load_data


Numble = train_data.shape[0]
Length = train_data.shape[1]
Feature = train_data.shape[2]
Batchsize = 128


class MyDataset(Dataset):
    def __init__(self, X_data):
        self.X_data = X_data

    def __len__(self):
        return len(self.X_data)

    def __getitem__(self, idx):
        x = torch.tensor(self.X_data[idx], dtype=torch.float32)  
        return x   
                    
dataset = MyDataset(train_data)    
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

def cycle(dl):         
    while True:
        for data in dl:
            yield data
            
dl = cycle( dataloader )
print( next(dl).shape )
print(Numble)

print( load_data[0][0]  )

torch.Size([128, 30, 6])
5775
[0.19777922 0.19484571 0.0380807  0.19849203 0.19913802 0.18855503]


In [10]:
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.distributions.bernoulli import Bernoulli



def introduce_missing_superior_to_mean(X):
    print("Introducing missing data with > mean")
    N, D = X.shape
    Xnan = X.copy()

    # ---- MNAR in D/2 dimensions
    mean = np.mean(Xnan[:, :int(D / 2)], axis=0)
    ix_larger_than_mean = Xnan[:, :int(D / 2)] > mean
    Xnan[:, :int(D / 2)][ix_larger_than_mean] = np.nan    # 大于均值的为nan

    Xnan = Xnan.astype(np.float32)
    Xz = Xnan.copy()
    Xz[np.isnan(Xnan.astype(np.float32))] = 0   # nan处级缺失处设置为0

    return Xnan, Xz


def introduce_missing_mean_values(X, percentage_to_remove = 30):
    print("Introduce missing data by removing the values around the mean")
    N, D = X.shape
    Xnan = X.copy()

    num_elements = int(N * percentage_to_remove / 100)   # number of elements to remove

    # ---- MNAR in D/2 dimensions
    mean = np.mean(Xnan[:, :int(D / 2)], axis=0)
    abs_diff_from_mean = np.abs(Xnan[:, :int(D / 2)] - mean)  # 每个值和均值的差
    indices_to_remove = np.argsort(abs_diff_from_mean, axis = 0)[:num_elements]  # np.argsort 按升序排序后的索引  
    # Set those values to NaN                                                    # 这里相当于取了和均值的差值在前30%的索引 
    for d in range(indices_to_remove.shape[1]):
        Xnan[indices_to_remove[:, d], d] = np.nan
    Xnan = Xnan.astype(np.float32)
    Xz = Xnan.copy()
    Xz[np.isnan(Xnan)] = 0
    return Xnan, Xz


def introduce_missing_extreme_values(X, percentile_extreme = 25):
    print("Introducing missing data via removing extreme values")
    N, D = X.shape
    Xnan = X.copy()

    # ---- MNAR in D/2 dimensions
    lower_bound = np.percentile(Xnan[:, :int(D / 2)], percentile_extreme, axis=0)
    upper_bound = np.percentile(Xnan[:, :int(D / 2)], 100 - percentile_extreme, axis=0)

    ix_lower = Xnan[:, :int(D / 2)] < lower_bound        # 小于下界的值的索引
    ix_higher = Xnan[:, :int(D / 2)] > upper_bound
    Xnan[:, :int(D / 2)][ix_lower | ix_higher] = np.nan  # 过大过小的去掉 换成nan
    Xnan = Xnan.astype(np.float32)
    
    Xz = Xnan.copy()
    Xz[np.isnan(Xnan)] = 0    # 换成0

    return Xnan, Xz

In [11]:
X_data = load_data.reshape(-1, Numble*Length,Feature).squeeze(0)

Xnan, Xz = introduce_missing_mean_values( X_data)       # 有nan
# Xnan, Xz = introduce_missing_superior_to_mean( X_data)
# Xnan, Xz = introduce_missing_extreme_values( X_data)


Xnan = Xnan.reshape(-1, Length, Feature)
S = np.array(~np.isnan(Xnan), dtype=np.float32)
print(S.shape)


n_1 = np.sum(S == 1)
n_0 = np.sum(S == 0)
print(n_0/(n_1+n_0))

Introduce missing data by removing the values around the mean
(5775, 30, 6)
0.15


In [12]:
number = load_data.shape[0]
L = load_data.shape[1]
D = load_data.shape[2]


'去掉全部缺失的列'
for i in range( number ):
    for j in range(D):
        if np.isnan(Xnan[i][:,j]).all() == True:
            Xnan[i][:,j] = load_data[i][:,j]
            S[i][:,j] = 1


In [13]:
'去掉完全缺失属性后的缺失率'
n_1 = np.sum(S == 1)
n_0 = np.sum(S == 0)
print(n_0/(n_1+n_0))

train_set =  {"X":  Xnan}


0.0667965367965368


In [14]:
from pypots.imputation import Pyraformer
from pypots.imputation import SAITS  
from pypots.imputation import ImputeFormer
from pypots.imputation import CSDI
# from pypots.optim import Adam
# from pypots.imputation import TimesNet
# from pypots.imputation import TimeMixer
# from pypots.imputation import TimeMixerPP
# from pypots.imputation import FEDformer
from pypots.imputation import Informer
# from pypots.imputation import ETSformer
# from pypots.imputation import Reformer
# from pypots.imputation import MICN
# from pypots.imputation import PatchTST
# from pypots.imputation import TiDE
# from pypots.imputation import Koopa
# from pypots.imputation import DLinear


baseline = 'CSDI'
# baseline = 'SAITS'
# baseline = 'Pyraformer'
# baseline = 'ImputeFormer'
# baseline = 'Informer'
# baseline = 'TimesNet'
# baseline = 'PatchTST'
# baseline = 'ETSformer'
# baseline = 'TimeMixer'
# baseline = 'DLinear'

[34m
████████╗██╗███╗   ███╗███████╗    ███████╗███████╗██████╗ ██╗███████╗███████╗    █████╗ ██╗
╚══██╔══╝██║████╗ ████║██╔════╝    ██╔════╝██╔════╝██╔══██╗██║██╔════╝██╔════╝   ██╔══██╗██║
   ██║   ██║██╔████╔██║█████╗█████╗███████╗█████╗  ██████╔╝██║█████╗  ███████╗   ███████║██║
   ██║   ██║██║╚██╔╝██║██╔══╝╚════╝╚════██║██╔══╝  ██╔══██╗██║██╔══╝  ╚════██║   ██╔══██║██║
   ██║   ██║██║ ╚═╝ ██║███████╗    ███████║███████╗██║  ██║██║███████╗███████║██╗██║  ██║██║
   ╚═╝   ╚═╝╚═╝     ╚═╝╚══════╝    ╚══════╝╚══════╝╚═╝  ╚═╝╚═╝╚══════╝╚══════╝╚═╝╚═╝  ╚═╝╚═╝
ai4ts v0.0.3 - building AI for unified time-series analysis, https://time-series.ai [0m



In [15]:
torch.manual_seed(0)

if baseline == 'SAITS':
    saits = SAITS(n_steps=Xnan.shape[1], n_features=Xnan.shape[2],
                  n_layers=2, d_model=256, n_heads=4, d_k=64, d_v=64, d_ffn=128, dropout=0.1, 
    #               n_layers=2, d_model=256, n_heads=4, d_k=128, d_v=128, d_ffn=128, dropout=0.1, 
                  ORT_weight = 1, MIT_weight = 1,
                  num_workers=0, 
                  batch_size=200, epochs=300, )   

    saits.fit( train_set = train_set ) 

    
# d_proj= 36, d_ffn=256 时模型不收敛
# 其他情况和数据集有关 考虑修改一些参数来让模型收敛
elif baseline == 'ImputeFormer':
    imputeFormer = ImputeFormer( 
        n_steps = Xnan.shape[1], n_features = Xnan.shape[2],
        n_layers=4,
        d_input_embed= 24, d_learnable_embed=128,     
        d_proj= 36,  d_ffn=128, 
        n_temporal_heads=4, 
        ORT_weight=1, MIT_weight=1,
        batch_size=200, epochs=300,  )

    imputeFormer.fit( train_set = train_set  )


elif baseline == 'CSDI':
    csdi = CSDI(
        n_steps= Xnan.shape[1], n_features= Xnan.shape[2],
        n_layers=6, n_heads=2, n_channels=128,
        d_time_embedding=64, d_feature_embedding=32, d_diffusion_embedding=128,
        target_strategy="random",
        n_diffusion_steps=50,
        batch_size=200, epochs=200,   # epoch200基本足够
#         optimizer=Adam(lr=1e-3), # 和使用默认的优化器效果差不多
        num_workers=0, )

    csdi.fit( train_set = train_set )


elif baseline == 'Pyraformer':
    pyraformer = Pyraformer( 
        n_steps = train_data.shape[1], n_features = train_data.shape[2],
        n_layers= 4, n_heads = 4,
        d_model = 256, d_ffn = 512,
        window_size = [1, 1, 1],
        inner_size = 3,
        dropout = 0.1,
        batch_size=200, epochs=500,  )

    pyraformer.fit( train_set = train_set  )


elif baseline == 'Informer':
    informer = Informer( 
    n_steps = train_data.shape[1], n_features = train_data.shape[2],
    n_layers=4, n_heads=8,
    d_model=256, d_ffn=512,
    factor=5,
    batch_size=200, epochs=400,  )

    informer.fit( train_set = train_set  )

    
# 参数 https://github.com/salesforce/ETSformer/blob/main/run.py
elif baseline == 'ETSformer':
    eTSformer = ETSformer( 
        n_steps = train_data.shape[1], n_features = train_data.shape[2],
        n_heads=8,
        d_model=256, d_ffn=512,
        top_k=1,
        n_encoder_layers=2, n_decoder_layers=2,
        batch_size=200, epochs=500,  )

    eTSformer.fit( train_set = train_set  )
    

    
elif baseline == 'PatchTST':
    patchTST = PatchTST( 
        n_steps = train_data.shape[1], n_features = train_data.shape[2],
        patch_size = 16,  patch_stride = 8,
        n_layers = 4,  d_model=256, n_heads=4,
        d_k = 64, d_v = 64, d_ffn = 128 ,
        dropout = 0.2, attn_dropout=0,
        batch_size=200, epochs=500,  )

    eTSformer.fit( train_set = train_set  )
    
    
elif baseline == 'TimesNet':
    timesNet = TimesNet(
        n_steps = train_data.shape[1], n_features = train_data.shape[2],
        n_layers = 4, top_k = 1,
        d_model = 256, d_ffn = 256,
        n_kernels = 5, dropout = 0,
        apply_nonstationary_norm = False,
        batch_size = 200,  epochs = 400,  )
    
    timesNet.fit( train_set = train_set  )
    
    
elif baseline == 'TimeMixer':
    timeMixer = TimeMixer(
        n_steps = train_data.shape[1], n_features = train_data.shape[2],
        n_layers = 4, top_k = 1,
        d_model = 256, d_ffn = 256,
        batch_size = 200,  epochs = 400,  )
    
    timeMixer.fit( train_set = train_set  )
    
    
# 参数  https://blog.csdn.net/java1314777/article/details/134670578
# https://github.com/Thinklab-SJTU/Crossformer
elif baseline == 'Crossformer':
    crossformer = Crossformer( 
        n_steps = train_data.shape[1], n_features = train_data.shape[2],
        n_layers=4, n_heads=4,
        d_model=256, d_ffn=512,
        factor=5,
        seg_len=6, win_size=2,
        batch_size=200, epochs=200,  )
    
    crossformer.fit( train_set = train_set  )
    
    
elif baseline == 'FEDformer':
    fEDformer = FEDformer( 
        n_steps = train_data.shape[1], n_features = train_data.shape[2],
        n_layers= 4, n_heads = 4,
        d_model = 256, d_ffn = 256,
        moving_avg_window_size = 24 ,
        dropout = 0.1,
        batch_size=200, epochs=200,  )
    
    fEDformer.fit( train_set = train_set  )
    
    
elif baseline == 'DLinear':
    dLinear = DLinear( 
        n_steps = train_data.shape[1], n_features = train_data.shape[2],
        moving_avg_window_size= 5,
        d_model=512,
        ORT_weight=1, MIT_weight=1,
        batch_size=200, epochs=300,  )
    
    dLinear.fit( train_set = train_set  )

2026-01-31 23:21:01 [INFO]: No given device, using default device: cuda
2026-01-31 23:21:01 [INFO]: CSDI initialized with the given hyperparameters, the number of trainable parameters: 1,693,761
2026-01-31 23:21:13 [INFO]: Epoch 001 - training loss (default): 0.3138
2026-01-31 23:21:19 [INFO]: Epoch 002 - training loss (default): 0.0700
2026-01-31 23:21:25 [INFO]: Epoch 003 - training loss (default): 0.0430
2026-01-31 23:21:32 [INFO]: Epoch 004 - training loss (default): 0.0395
2026-01-31 23:21:38 [INFO]: Epoch 005 - training loss (default): 0.0293
2026-01-31 23:21:45 [INFO]: Epoch 006 - training loss (default): 0.0288
2026-01-31 23:21:51 [INFO]: Epoch 007 - training loss (default): 0.0234
2026-01-31 23:21:57 [INFO]: Epoch 008 - training loss (default): 0.0230
2026-01-31 23:22:04 [INFO]: Epoch 009 - training loss (default): 0.0222
2026-01-31 23:22:10 [INFO]: Epoch 010 - training loss (default): 0.0223
2026-01-31 23:22:17 [INFO]: Epoch 011 - training loss (default): 0.0228
2026-01-31 23

2026-01-31 23:33:13 [INFO]: Epoch 111 - training loss (default): 0.0093
2026-01-31 23:33:20 [INFO]: Epoch 112 - training loss (default): 0.0110
2026-01-31 23:33:27 [INFO]: Epoch 113 - training loss (default): 0.0099
2026-01-31 23:33:33 [INFO]: Epoch 114 - training loss (default): 0.0084
2026-01-31 23:33:40 [INFO]: Epoch 115 - training loss (default): 0.0093
2026-01-31 23:33:46 [INFO]: Epoch 116 - training loss (default): 0.0107
2026-01-31 23:33:53 [INFO]: Epoch 117 - training loss (default): 0.0098
2026-01-31 23:34:00 [INFO]: Epoch 118 - training loss (default): 0.0102
2026-01-31 23:34:06 [INFO]: Epoch 119 - training loss (default): 0.0093
2026-01-31 23:34:13 [INFO]: Epoch 120 - training loss (default): 0.0089
2026-01-31 23:34:19 [INFO]: Epoch 121 - training loss (default): 0.0099
2026-01-31 23:34:26 [INFO]: Epoch 122 - training loss (default): 0.0091
2026-01-31 23:34:33 [INFO]: Epoch 123 - training loss (default): 0.0100
2026-01-31 23:34:39 [INFO]: Epoch 124 - training loss (default):

In [41]:
train_data = load_data * S
train_data[train_data == 0] = np.nan


'选择不同的测试集'
test_way = 1
# test_way = 2
# test_way = 3


'选择测试集方法1 常规方法 500个样本'
'train_data和Xnan一样 '
if test_way == 1:
    test_set =  {"X": train_data[0:500] }
#     test_set =  {"X":  Xnan[0:500]}


# 选择测试集方法2 常规方法 全部样本
elif test_way == 2:
    test_set =  {"X": train_data }
    # test_set =  {"X":  Xnan}

    
# 选择测试集方法3 
# '取前500个样本  为了和dps-not方法的样本顺序一样 需要进行一次shuffle 然后取出来 再测试'
elif test_way == 3:   

    class MyDataset(Dataset):
        def __init__(self, X_data, Y_data):
            self.X_data = X_data
            self.Y_data = Y_data
        def __len__(self):
            return len(self.X_data)
        def __getitem__(self, idx):
            x = torch.tensor(self.X_data[idx], dtype=torch.float32)    # 可以用dtype修改数据类型
            y = torch.tensor(self.Y_data[idx])                         # 掩码修改数据类似的话会变成01
            return x, y   


    dataset_with_mask = MyDataset( Xnan[0:500], S[0:500] ) 
    dataloader_with_mask = DataLoader(dataset_with_mask, batch_size=500, shuffle=True) 

    torch.manual_seed(0)
    Xnan_shuffle = next(iter(dataloader_with_mask))[0]

    torch.manual_seed(0)
    S_shuffle = next(iter(dataloader_with_mask))[1].numpy()
    print( Xnan_shuffle[0][0]  )
    print( S_shuffle[0][0]  )



    test_set =  {"X":  Xnan_shuffle}

    # 原数据集也要shuffle一下
    torch.manual_seed(0)
    dataset_with_mask = MyDataset( load_data[0:500], S[0:500] ) 
    dataloader_with_mask = DataLoader(dataset_with_mask, batch_size=500, shuffle=True) 
    test_load_data = next(iter(dataloader_with_mask))[0].numpy()
    print( test_load_data[0][0] )

In [42]:
if baseline == 'Pyraformer':
    result_base = pyraformer.predict(test_set)
    
elif baseline == 'SAITS':
    result_base = saits.predict(test_set)
    
elif baseline == 'ImputeFormer':
    result_base = imputeFormer.predict(test_set)
    
elif baseline == 'Informer':
    result_base = informer.predict(test_set)
    
elif baseline == 'ETSformer':
    result_base = eTSformer.predict(test_set)
    
elif baseline == 'TimesNet':
    result_base = timesNet.predict(test_set)
    
elif baseline == 'TimeMixer':
    result_base = timeMixer.predict(test_set)
    
elif baseline == 'DLinear':
    result_base = dLinear.predict(test_set)  

elif baseline == 'CSDI':
    result_base = csdi.predict(test_set, n_sampling_times=2)  
    
    
    
if baseline == 'CSDI':
    result_base = result_base["imputation"]
    result_base = result_base.mean(axis=1)
else:   
    result_base = result_base["imputation"]
    
print(result_base.shape )

(500, 30, 6)


In [44]:
from pypots.utils.metrics import calc_mse,calc_rmse,calc_mae

'方法1测试集的rmse 500个样本'
if test_way == 1:
    rmse = calc_rmse(result_base, load_data[0:500], 1- S[0:500])
    mae = calc_mae(result_base, load_data[0:500], 1- S[0:500]) 
    print(round(rmse,4), round(mae,4) )


# '方法2测试集的rmse 全部样本'
elif test_way == 2:
    rmse = calc_rmse(result_base, load_data, 1- S)
    mae = calc_mae(result_base, load_data, 1- S) 
    print(round(rmse,4), round(mae,4) )


# '方法3的测试集的rmse'
elif test_way == 3:
    rmse = calc_rmse( result_base, test_load_data, 1-S_shuffle )
    mae = calc_mae( result_base, test_load_data, 1-S_shuffle) 
    print(round(rmse,4), round(mae,4) )
    


0.0243 0.0174
