# PaD-TS

### Load Packages

In [1]:
import torch
import argparse
import numpy as np
from resample import UniformSampler,Batch_Same_Sampler
from Model import PaD_TS
from diffmodel_init import create_gaussian_diffusion
from training import Trainer
from data_preprocessing.real_dataloader import CustomDataset
from data_preprocessing.sine_dataloader import SineDataset
from data_preprocessing.real_dataloader import fMRIDataset
from data_preprocessing.mujoco_dataloader import MuJoCoDataset
from torchsummary import summary
from data_preprocessing.sampling import sampling
from eval_run import discriminative_score,predictive_score,BMMD_score,BMMD_score_naive,VDS_score

  from .autonotebook import tqdm as notebook_tqdm


### Dataset Selection and Args loading

In [2]:
data = 'sine'
if data == 'energy':
    from configs.energy_config import Training_args, Model_args, Diffusion_args, DataLoader_args, Data_args
elif data == 'stock':
    from configs.stock_config import Training_args, Model_args, Diffusion_args, DataLoader_args, Data_args
elif data == 'sine':
    from configs.sine_config import Training_args, Model_args, Diffusion_args, DataLoader_args, Data_args
else:
    raise NotImplementedError(f"Unkown Dataset: {args.data}")
    
train_arg = Training_args()
model_arg = Model_args()
diff_arg = Diffusion_args()
dl_arg = DataLoader_args()
d_arg = Data_args()

In [3]:
if data == 'sine':
    dataset = SineDataset(
            window=24,
            num=d_arg.num,
            dim=d_arg.dim,
            save2npy=d_arg.save2npy,
            neg_one_to_one=d_arg.neg_one_to_one,
            seed=d_arg.seed,
            period=d_arg.period
        )
else:
    dataset = CustomDataset(
            name=d_arg.name,
            proportion=d_arg.proportion,
            data_root=d_arg.data_root,
            window=d_arg.window,
            save2npy=d_arg.save2npy,
            neg_one_to_one=d_arg.neg_one_to_one,
            seed=d_arg.seed,
            period=d_arg.period)
dataloader = torch.utils.data.DataLoader(dataset,
                                        batch_size=dl_arg.batch_size,
                                        shuffle=dl_arg.shuffle,
                                        num_workers=dl_arg.num_workers,
                                        drop_last=dl_arg.drop_last,
                                        pin_memory=dl_arg.pin_memory)


Sampling sine-dataset: 100%|██████████| 10000/10000 [00:00<00:00, 10014.84it/s]


### Model and Diffusion Process init

In [4]:
model = PaD_TS(hidden_size=model_arg.hidden_size, 
               num_heads=model_arg.num_heads,
               n_encoder=model_arg.n_encoder, 
               n_decoder=model_arg.n_decoder, 
               feature_last=model_arg.feature_last, 
               mlp_ratio=model_arg.mlp_ratio, 
               input_shape=model_arg.input_shape)
diffusion = create_gaussian_diffusion(predict_xstart=diff_arg.predict_xstart,
                                      diffusion_steps=diff_arg.diffusion_steps,
                                      noise_schedule=diff_arg.noise_schedule,
                                      loss=diff_arg.loss,
                                      rescale_timesteps=diff_arg.rescale_timesteps)
if train_arg.schedule_sampler == 'batch':
    schedule_sampler = Batch_Same_Sampler(diffusion)
elif train_arg.schedule_sampler == 'uniform':
    schedule_sampler = UniformSampler(diffusion)
else:
    raise NotImplementedError(f"Unkown sampler: {train_arg.schedule_sampler}")
    
trainer = Trainer(model=model,
                    diffusion=diffusion, 
                    data=dataloader, 
                    batch_size=dl_arg.batch_size,  
                    lr = train_arg.lr,
                    weight_decay = train_arg.weight_decay,
                    lr_anneal_steps=train_arg.lr_anneal_steps,
                    log_interval=train_arg.log_interval,
                    save_interval=train_arg.save_interval,
                    save_dir=train_arg.save_dir,
                    schedule_sampler = schedule_sampler,
                    mmd_alpha = train_arg.mmd_alpha)
print('Loss Function: ',diff_arg.loss)
print('Save Directory: ',train_arg.save_dir)
print('Schedule Sampler: ',train_arg.schedule_sampler)
print('Batch Size: ',dl_arg.batch_size)
print('Diffusion Steps: ',diff_arg.diffusion_steps)
print('Epochs: ',train_arg.lr_anneal_steps)
print('Alpha: ',train_arg.mmd_alpha)
print('Window Size: ',d_arg.window)
print('Data shape: ',model_arg.input_shape)
print('Hidden: ', model_arg.hidden_size)

Loss Function:  MSE_MMD
Save Directory:  ./OUTPUT/sine_24/
Schedule Sampler:  batch
Batch Size:  64
Diffusion Steps:  250
Epochs:  70000
Alpha:  0.0005
Window Size:  24
Data shape:  (24, 5)
Hidden:  128


In [5]:
print('======Training======')
trainer.train()
print('======Done======')



mse: 0.011447, mmd: 0.000006, total: 0.011453: 100%|██████████| 70000/70000 [1:16:48<00:00, 15.19it/s]




### Generate samples and Evaluate

In [6]:
print('======Generate Samples======')
concatenated_tensor = sampling(model,diffusion,dataset.sample_num,dataset.window, dataset.var_num, dl_arg.batch_size)
np.save( f'{train_arg.save_dir}ddpm_fake_{d_arg.name}_{dataset.window}.npy', concatenated_tensor.cpu())
print(f'{train_arg.save_dir}ddpm_fake_{d_arg.name}_{dataset.window}.npy')

print('======Diff Eval======')
np_fake = np.array(concatenated_tensor.detach().cpu())
print('======Discriminative Score======')
discriminative_score(d_arg.name,5, np_fake,length=d_arg.window)
print('======Predictive Score======')
predictive_score(d_arg.name,5, np_fake,length=d_arg.window)
print('======VDS Score======')
VDS_score(d_arg.name, concatenated_tensor,length=d_arg.window)
print('======FDDS Score======')
BMMD_score_naive(d_arg.name, concatenated_tensor,length=d_arg.window)
print('======Finished======')



100%|██████████| 157/157 [02:48<00:00,  1.07s/it]


./OUTPUT/sine_24/ddpm_fake_sine_24.npy
Fake data: min  0.49786687 , max  1.0
Real data: min  0.5000000455140784 , max  0.9999999999995366
0
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use tf.global_variables instead.


training: 100%|██████████| 2000/2000 [00:36<00:00, 55.23it/s]


Iter 0:  0.007750000000000035 , 0.4655 , 0.55 

1


training: 100%|██████████| 2000/2000 [00:35<00:00, 56.72it/s]


Iter 1:  0.010000000000000009 , 0.4815 , 0.4985 

2


training: 100%|██████████| 2000/2000 [00:34<00:00, 57.49it/s]


Iter 2:  0.008500000000000008 , 0.531 , 0.452 

3


training: 100%|██████████| 2000/2000 [00:34<00:00, 57.48it/s]


Iter 3:  0.0020000000000000018 , 0.587 , 0.417 

4


training: 100%|██████████| 2000/2000 [00:35<00:00, 57.12it/s]


Iter 4:  0.004750000000000032 , 0.5325 , 0.477 

sine:
Final Score:  0.006600000000000017 ± 0.003978898672157365
Fake data: min  0.49786687 , max  1.0
Real data: min  0.5000000455140784 , max  0.9999999999995366


training: 100%|██████████| 5000/5000 [00:51<00:00, 96.85it/s] 


0  epoch:  0.09316353265241949 



training: 100%|██████████| 5000/5000 [00:42<00:00, 118.42it/s]


1  epoch:  0.09282965929533019 



training: 100%|██████████| 5000/5000 [00:44<00:00, 112.15it/s]


2  epoch:  0.09308739527400377 



training: 100%|██████████| 5000/5000 [00:49<00:00, 101.44it/s]


3  epoch:  0.09295979806334236 



training: 100%|██████████| 5000/5000 [00:48<00:00, 103.61it/s]


4  epoch:  0.0927986646338338 

sine:
Final Score:  0.09296780998378593 ± 0.00019668517967023403


100%|██████████| 5/5 [00:03<00:00,  1.65it/s]


sine VDS Score: tensor(0.0003)


100%|██████████| 10/10 [00:05<00:00,  2.00it/s]

sine FDDS Score: tensor(0.0003)



