In [1]:

%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from pygrinder import mcar
from pypots.data import load_specific_dataset

  @autocast(enabled=False)
  @autocast(enabled=False)


In [3]:
data = load_specific_dataset('physionet_2012')  # PyPOTS will automatically download and extract it.


2024-09-16 01:43:22 [INFO]: Loading the dataset physionet_2012 with TSDB (https://github.com/WenjieDu/Time_Series_Data_Beans)...
2024-09-16 01:43:22 [INFO]: Starting preprocessing physionet_2012...
2024-09-16 01:43:22 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-09-16 01:43:22 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-09-16 01:43:22 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-09-16 01:43:22 [INFO]: Loaded successfully!
2024-09-16 01:44:16 [INFO]: 69707 values masked out in the val set as ground truth, take 9.97% of the original observed values
2024-09-16 01:44:16 [INFO]: 86366 values masked out in the test set as ground truth, take 10.02% of the original observed values
2024-09-16 01:44:16 [INFO]: Total sample number: 1

In [4]:
data.keys()

dict_keys(['n_classes', 'n_steps', 'n_features', 'scaler', 'train_X', 'train_y', 'train_ICUType', 'val_X', 'val_y', 'val_ICUType', 'test_X', 'test_y', 'test_ICUType', 'val_X_ori', 'test_X_ori'])

In [5]:
X = data['train_X']
num_samples = len(X)
# X = X.drop(['RecordID', 'Time'], axis = 1)
X = X.reshape(-1, 37)
X = StandardScaler().fit_transform(X)
X = X.reshape(num_samples, 48, -1)
X_ori = X  # keep X_ori for validation
X = mcar(X, 0.1)

In [6]:
print(X.shape)  # (11988, 48, 37), 11988 samples and each sample has 48 time steps, 37 features

(7671, 48, 37)


In [7]:
from pypots.imputation import BRITS
from BRITS.PypotsBrits import BRITS as myBrits

In [8]:
saits = BRITS(n_steps=48, n_features=37, epochs=10, rnn_hidden_size=10)


2024-09-16 01:44:21 [INFO]: No given device, using default device: cuda
2024-09-16 01:44:25 [INFO]: BRITS initialized with the given hyperparameters, the number of trainable parameters: 19,628


In [9]:
dataset = {"X": X[:100]}  # X for model input
saits.fit(dataset)  # train the model on the dataset


2024-09-16 01:44:36 [INFO]: Epoch 001 - training loss: 1.5125
2024-09-16 01:44:40 [INFO]: Epoch 002 - training loss: 1.3970
2024-09-16 01:44:44 [INFO]: Epoch 003 - training loss: 1.3864
2024-09-16 01:44:47 [INFO]: Epoch 004 - training loss: 1.4552
2024-09-16 01:44:51 [INFO]: Epoch 005 - training loss: 1.4255
2024-09-16 01:44:55 [INFO]: Epoch 006 - training loss: 1.3910
2024-09-16 01:44:58 [INFO]: Epoch 007 - training loss: 1.3982
2024-09-16 01:45:04 [INFO]: Epoch 008 - training loss: 1.4460
2024-09-16 01:45:09 [INFO]: Epoch 009 - training loss: 1.3994
2024-09-16 01:45:12 [INFO]: Epoch 010 - training loss: 1.3508
2024-09-16 01:45:12 [INFO]: Finished training. The best model is from epoch#10.


In [29]:
from Logger.ConsoleLogger import ConsoleLogger
from AbstractModel.score import get_score, ScoreType
from AbstractModel.optimizer.abstract_optimizer import Adam
from AbstractModel.error.AbstractError import ErrorType
from AbstractModel.error.TorchError import get_error
from AbstractModel.Parametrs import TimeSeriesConfig, TorchNNConfig
from PyPOTSAdapter.BRITS.BRITS import BRITSImpute

error = get_error(ErrorType.MAE)()
time_series_config = TimeSeriesConfig(X[:100].shape[2],
                                      X[:100].shape[1])
train_config = TorchNNConfig(
    batch_size=32,
    epochs=10,
    error_factory=error,
    optimizer_type=Adam(amsgrad=False),
    score_factory=get_score(ScoreType.MSE),
    early_stopping_patience=50
)
print_logger = ConsoleLogger().configure()

model = BRITSImpute(time_series=time_series_config,
                    neural_network_config=train_config,
                    logger=print_logger,
                    device='cuda:0')
history = model.train(X[:100], X[:100])

2024-09-16 02:15:14,156 - ConsoleLogger - INFO
----------------------------
[LogKeys.EPOCH]: 0
[EpochType.TRAIN]: {'MAE': 1.8694677750269573, 'brits_score': [MSE]: 1.1527024904886882}
[EpochType.EVAL]: {'MAE': 1.927904725074768, 'brits_score': [MSE]: 1.2804162502288818}
---------------------------

2024-09-16 02:15:14,157 - ConsoleLogger - INFO
----------------------------
new best model
---------------------------

2024-09-16 02:15:20,858 - ConsoleLogger - INFO
----------------------------
[LogKeys.EPOCH]: 1
[EpochType.TRAIN]: {'MAE': 1.8339452346165974, 'brits_score': [MSE]: 1.117957353591919}
[EpochType.EVAL]: {'MAE': 1.7786332766215007, 'brits_score': [MSE]: 1.0722324053446453}
---------------------------

2024-09-16 02:15:20,860 - ConsoleLogger - INFO
----------------------------
new best model
---------------------------

2024-09-16 02:15:25,839 - ConsoleLogger - INFO
----------------------------
[LogKeys.EPOCH]: 2
[EpochType.TRAIN]: {'MAE': 1.8319321473439534, 'brits_score': [MS

In [34]:
san_brits_result = model(X[:100])

In [None]:
calc_mae(brits_impute, np.nan_to_num(X_ori[:100]),
         indicating_mask)  

In [62]:
dataset = {"X": X[:100]}  # X for model input
myBritrs = myBrits(n_steps=48, n_features=37, epochs=10, rnn_hidden_size=10)
myBritrs.fit(dataset)

2024-09-16 01:34:33 [INFO]: No given device, using default device: cuda
2024-09-16 01:34:33 [INFO]: BRITS initialized with the given hyperparameters, the number of trainable parameters: 19,628


KeyboardInterrupt: 

In [37]:
brits_impute = saits.impute(dataset)


In [78]:
my_brits_impute = myBritrs.impute(dataset)
indicating_mask = np.isnan(X) ^ np.isnan(X_ori)

tensor(0.7052, device='cuda:0') tensor(0.6363, device='cuda:0') tensor(1.3415, device='cuda:0')
tensor(0.6531, device='cuda:0') tensor(0.6347, device='cuda:0') tensor(1.2878, device='cuda:0')
tensor(0.5035, device='cuda:0') tensor(0.4907, device='cuda:0') tensor(0.9941, device='cuda:0')
tensor(0.6895, device='cuda:0') tensor(0.6528, device='cuda:0') tensor(1.3423, device='cuda:0')


In [35]:
indicating_mask = np.isnan(X) ^ np.isnan(X_ori)
indicating_mask = indicating_mask[:100]

In [38]:
from pypots.utils.metrics import calc_mae

calc_mae(brits_impute, np.nan_to_num(X_ori[:100]),
         indicating_mask)  # calculate mean absolute error on the ground truth (artificially-missing values)


0.6460574315569586

In [39]:
calc_mae(san_brits_result, np.nan_to_num(X_ori[:100]),
         indicating_mask)  # calculate mean absolute error on the ground truth (artificially-missing values)

0.5100794408556515

(100, 48, 37)

In [30]:
indicating_mask.shape

(7671, 48, 37)