In [1]:
import torch
import argparse
import numpy as np
from resample import UniformSampler,Batch_Same_Sampler
from Model import PaD_TS
from diffmodel_init import create_gaussian_diffusion
from training import Trainer
from data_preprocessing.real_dataloader import CustomDataset
from data_preprocessing.sine_dataloader import SineDataset
from data_preprocessing.real_dataloader import fMRIDataset
from data_preprocessing.mujoco_dataloader import MuJoCoDataset
from torchsummary import summary
from data_preprocessing.sampling import sampling
from eval_run import discriminative_score,predictive_score,BMMD_score,BMMD_score_naive,VDS_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = 'sine'
if data == 'energy':
    from configs.energy_config import Training_args, Model_args, Diffusion_args, DataLoader_args, Data_args
elif data == 'stock':
    from configs.stock_config import Training_args, Model_args, Diffusion_args, DataLoader_args, Data_args
elif data == 'sine':
    from configs.sine_config import Training_args, Model_args, Diffusion_args, DataLoader_args, Data_args
else:
    raise NotImplementedError(f"Unkown Dataset: {args.data}")
    
train_arg = Training_args()
model_arg = Model_args()
diff_arg = Diffusion_args()
dl_arg = DataLoader_args()
d_arg = Data_args()

In [63]:
# dataset = CustomDataset(
#             name=d_arg.name,
#             proportion=d_arg.proportion,
#             data_root=d_arg.data_root,
#             window=d_arg.window,
#             save2npy=d_arg.save2npy,
#             neg_one_to_one=d_arg.neg_one_to_one,
#             seed=d_arg.seed,
#             period=d_arg.period)

In [22]:
dataset = SineDataset(
            window=24,
            num=d_arg.num,
            dim=d_arg.dim,
            save2npy=d_arg.save2npy,
            neg_one_to_one=d_arg.neg_one_to_one,
            seed=d_arg.seed,
            period=d_arg.period
        )

Sampling sine-dataset: 100%|██████████| 10000/10000 [00:01<00:00, 9943.14it/s]


In [23]:
dataset.rawdata.min()

0.5000000455140784

In [4]:


dataloader = torch.utils.data.DataLoader(dataset,
                                        batch_size=dl_arg.batch_size,
                                        shuffle=dl_arg.shuffle,
                                        num_workers=dl_arg.num_workers,
                                        drop_last=dl_arg.drop_last,
                                        pin_memory=dl_arg.pin_memory)
model = PaD_TS(hidden_size=model_arg.hidden_size, 
               num_heads=model_arg.num_heads,
               n_encoder=model_arg.n_encoder, 
               n_decoder=model_arg.n_decoder, 
               feature_last=model_arg.feature_last, 
               mlp_ratio=model_arg.mlp_ratio, 
               input_shape=model_arg.input_shape)
diffusion = create_gaussian_diffusion(predict_xstart=diff_arg.predict_xstart,
                                      diffusion_steps=diff_arg.diffusion_steps,
                                      noise_schedule=diff_arg.noise_schedule,
                                      loss=diff_arg.loss,
                                      rescale_timesteps=diff_arg.rescale_timesteps)
if train_arg.schedule_sampler == 'batch':
    schedule_sampler = Batch_Same_Sampler(diffusion)
elif train_arg.schedule_sampler == 'uniform':
    schedule_sampler = UniformSampler(diffusion)
else:
    raise NotImplementedError(f"Unkown sampler: {train_arg.schedule_sampler}")
    
trainer = Trainer(model=model,
                    diffusion=diffusion, 
                    data=dataloader, 
                    batch_size=dl_arg.batch_size,  
                    lr = train_arg.lr,
                    weight_decay = train_arg.weight_decay,
                    lr_anneal_steps=train_arg.lr_anneal_steps,
                    log_interval=train_arg.log_interval,
                    save_interval=train_arg.save_interval,
                    save_dir=train_arg.save_dir,
                    schedule_sampler = schedule_sampler,
                    mmd_alpha = train_arg.mmd_alpha)
print('Loss Function: ',diff_arg.loss)
print('Save Directory: ',train_arg.save_dir)
print('Schedule Sampler: ',train_arg.schedule_sampler)
print('Batch Size: ',dl_arg.batch_size)
print('Diffusion Steps: ',diff_arg.diffusion_steps)
print('Epochs: ',train_arg.lr_anneal_steps)
print('Alpha: ',train_arg.mmd_alpha)
print('Window Size: ',d_arg.window)
print('Data shape: ',model_arg.input_shape)
print('Hidden: ', model_arg.hidden_size)

Loss Function:  MSE_MMD
Save Directory:  ./OUTPUT/sine_24_MMD_save/
Schedule Sampler:  batch
Batch Size:  64
Diffusion Steps:  250
Epochs:  70000
Alpha:  0.0005
Window Size:  24
Data shape:  (24, 5)
Hidden:  128


In [6]:
print('======Training======')
trainer.train()
print('======Done======')



mse: 0.039049, mmd: 0.000065, total: 0.039114:   0%|          | 49/10000 [00:03<12:27, 13.31it/s]


KeyboardInterrupt: 

In [7]:
print('======Generate Samples======')
concatenated_tensor = sampling(model,diffusion,dataset.sample_num,dataset.window, dataset.var_num, dl_arg.batch_size)
np.save( f'{train_arg.save_dir}ddpm_fake_{d_arg.name}_{dataset.window}.npy', concatenated_tensor.cpu())
print(f'{train_arg.save_dir}ddpm_fake_{d_arg.name}_{dataset.window}.npy')

print('======Diff Eval======')
np_fake = np.array(concatenated_tensor.detach().cpu())
print('======Discriminative Score======')
discriminative_score(d_arg.name,5, np_fake,length=d_arg.window)
print('======Predictive Score======')
predictive_score(d_arg.name,5, np_fake,length=d_arg.window)
print('======VDS Score======')
VDS_score(d_arg.name, concatenated_tensor,length=d_arg.window)
print('======FDDS Score======')
BMMD_score_naive(d_arg.name, concatenated_tensor,length=d_arg.window)
print('======Finished======')



100%|██████████| 157/157 [02:56<00:00,  1.13s/it]


./OUTPUT/sine_24_MMD_save/ddpm_fake_sine_24.npy
Fake data: min  0.26655108 , max  0.96131265
Real data: min  0.5000000455140784 , max  0.9999999999995366
0
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use tf.global_variables instead.


training: 100%|██████████| 2000/2000 [00:36<00:00, 54.91it/s]


Iter 0:  0.5 , 1.0 , 1.0 

1


training: 100%|██████████| 2000/2000 [00:35<00:00, 55.82it/s]


Iter 1:  0.5 , 1.0 , 1.0 

2


training:  11%|█         | 214/2000 [00:04<00:35, 49.82it/s]


KeyboardInterrupt: 