In [9]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from pypots.imputation import SAITS  
from pypots.utils.metrics import calc_mse,calc_rmse,calc_mae
from pypots.optim import Adam
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import time
from utils.util import get_mask_mnr, get_mask_bm, get_mask_rm, get_mask_mcar


In [10]:
data_name = 'sp500'
# data_name = 'ETTh1'
# data_name = 'energy'
# data_name = 'mujoco'


if data_name == 'sp500':
    train_data_path = "./datasets/sp500.npy"

if data_name == 'energy':
    train_data_path = "./datasets/energy.npy"
    
if data_name == 'ETTh1':
    train_data_path= "./datasets/ETTh1.npy"
    
if data_name == 'mujoco':
    train_data_path = "./datasets/MuJoCo.npy"


load_data = np.load(train_data_path)
np.random.shuffle(load_data)

Number, Length, Attribute  = load_data.shape
print(Number, Length, Attribute)

5775 30 6


In [11]:
mr = 0.3
mechanism = 'mcar'
# mechanism = 'mnr'
missing_length = int( mr * Length )

In [12]:
Mask = torch.empty([0, Length, Attribute])    
print( Mask.shape)

for i in range(Number):
    if mechanism == 'mcar':
        one_mask = get_mask_mcar( load_data[0], mr)  
    else:
        one_mask = get_mask_mnr( load_data[0], missing_length) 
    one_mask = one_mask.unsqueeze(0)   
    Mask = torch.row_stack([Mask, one_mask])
Mask = Mask.float().numpy()

print( Mask.shape  )
print( (1-Mask).sum()/( (1-Mask).sum()+Mask.sum() ) )  
print( (1-Mask).sum()/( Number* Length*Attribute )  )
print(mr, mechanism, missing_length)

torch.Size([0, 30, 6])
(5775, 30, 6)
0.3
0.3
0.3 mcar 9


In [13]:
train_data = load_data * Mask
train_data[train_data == 0] = np.nan
train_set = {"X": train_data}

In [14]:
Mask = torch.empty([0, Length, Attribute])    
for i in range(Number):
    if mechanism == 'mcar':
        one_mask = get_mask_mcar( load_data[0], mr)  
    else:
        one_mask = get_mask_mnr( load_data[0], missing_length) 
    one_mask = one_mask.unsqueeze(0)   
    Mask = torch.row_stack([Mask, one_mask])

Mask = Mask.float().numpy()
test_data = load_data * Mask
test_data[test_data == 0] = np.nan
test_set =  {"X": test_data}
print(mr, mechanism)

0.3 mcar


In [18]:
saits = SAITS(n_steps=train_data.shape[1], n_features=train_data.shape[2],
              n_layers=2, d_model=256, n_heads=4, d_k=64, d_v=64, d_ffn=128, dropout=0.1, 
#               n_layers=2, d_model=256, n_heads=4, d_k=128, d_v=128, d_ffn=128, dropout=0.1,  # for mujoco
              num_workers=0, optimizer=Adam(lr=1e-3), 
              batch_size=200, epochs=500, )

In [17]:
saits.fit( train_set = train_set) 

In [2]:
result = saits.predict(test_set)
result = result["imputation"]
print(result.shape)

rmse = calc_rmse(result, load_data, 1-Mask)
mae = calc_mae(result, load_data, 1-Mask)  
print(rmse, mae )

In [19]:
from pypots.imputation import CSDI

csdi = CSDI(
    n_steps= train_data.shape[1], n_features= train_data.shape[2],
    n_layers=6, n_heads=2, n_channels=128,
    d_time_embedding=64, d_feature_embedding=32, d_diffusion_embedding=128,
    target_strategy="random",
    n_diffusion_steps=50,
    batch_size=200,             
    epochs=1,
    optimizer=Adam(lr=1e-3),
    num_workers=0, )

In [10]:
csdi.fit( train_set = train_set )

In [3]:
start_time = time.time()

result = csdi.predict(test_set, n_sampling_times=1)  

end_time = time.time()
execution_time = end_time - start_time  
print(execution_time) 


result = result["imputation"]
print(result.shape)

result = result.mean(axis=1)
print(result.shape)

In [4]:
rmse = calc_rmse(result, load_data, 1-Mask)
mae = calc_mae(result, load_data, 1-Mask)  
print(rmse, mae )

In [20]:
from pypots.imputation import ETSformer

eTSformer = ETSformer( 
    n_steps = train_data.shape[1], n_features = train_data.shape[2],
    n_heads=8,
    d_model=256, d_ffn=512,
    top_k=1,
    n_encoder_layers=2, n_decoder_layers=2,
    batch_size=200, epochs=1000,  )

In [21]:
eTSformer.fit( train_set = train_set  )

In [5]:
result = eTSformer.predict(test_set)
result = result["imputation"]
print(result.shape)

rmse = calc_rmse(result, load_data, 1-Mask)
mae = calc_mae(result, load_data, 1-Mask)  
print(rmse, mae )