In [None]:
import torch
import numpy as np
import torch.nn as nn
from tqdm.notebook import tqdm_notebook as tqdm
import torch.nn.functional as F
import time
import datetime
import torch.optim as optim 

In [None]:
dataset_file = 'RLD.csv'  
use_gpu = False
if use_gpu:
    torch.cuda.set_device(0)

In [None]:
mb_size = 128

p_miss = 0.1

p_hint = 0.8

alpha = 400

train_rate = 0.8

Data = np.loadtxt(dataset_file, delimiter=",",skiprows=1)


No = len(Data)
Dim = len(Data[0,:])


H_Dim1 = Dim
H_Dim2 = Dim
Min_Val = np.zeros(Dim)
Max_Val = np.zeros(Dim)

for i in range(Dim):
    Min_Val[i] = np.min(Data[:,i])
    Data[:,i] = Data[:,i] - np.min(Data[:,i])
    Max_Val[i] = np.max(Data[:,i])
    Data[:,i] = Data[:,i] / (np.max(Data[:,i]) + 1e-6)    
p_miss_vec = p_miss * np.ones((Dim,1)) 

Missing = np.zeros((No,Dim))

for i in range(Dim):
    A = np.random.uniform(0., 1., size = [len(Data),])
    B = A > p_miss_vec[i]
    Missing[:,i] = 1.*B


idx = np.random.permutation(No)

Train_No = int(No * train_rate)
Test_No = No - Train_No
    

trainX = Data[idx[:Train_No],:]
testX = Data[idx[Train_No:],:]

trainM = Missing[idx[:Train_No],:]
testM = Missing[idx[Train_No:],:]


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / np.sqrt(in_dim / 2.)
    return np.random.normal(size = size, scale = xavier_stddev)
    

def sample_M(m, n, p):
    A = np.random.uniform(0., 1., size = [m, n])
    B = A > p
    C = 1.*B
    return C
   

In [None]:
class MRNNImputer(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers=1):  
        super(MRNNImputer, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
       
        self.lstm = nn.LSTM(input_dim * 2, hidden_dim, num_layers, batch_first=True, bidirectional=False)
        
     
        self.fc = nn.Linear(hidden_dim, input_dim)  
    
    def forward(self, X, M):

        combined_input = torch.cat([X, M], dim=-1)
        

        lstm_out, _ = self.lstm(combined_input)

        imputed_data = self.fc(lstm_out)
        
        return imputed_data


learning_rate = 0.1  
epochs = 1000  
batch_size = 64


model = MRNNImputer(input_dim=Dim, hidden_dim=64)  
loss_fn = nn.MSELoss()


for epoch in range(epochs):
    model.train()  
    total_loss = 0.0
    
    
    for i in tqdm(range(0, Train_No, batch_size)):

        X_mb = torch.tensor(trainX[i:i+batch_size], dtype=torch.float32)
        M_mb = torch.tensor(trainM[i:i+batch_size], dtype=torch.float32)
        

        imputed_data = model(X_mb, M_mb)
        

        loss = loss_fn(imputed_data * M_mb, X_mb * M_mb)  
        total_loss += loss.item()
        

        loss.backward()
        

    print(f'Epoch [{epoch+1}/{epochs}], Loss: {total_loss/Train_No:.4f}')

torch.save(model.state_dict(), 'mrnn_imputer.pth')



In [None]:
torch.save(model.state_dict(), 'mrnn_imputer_trained.pth')

model.eval() 
total_MSE_test = 0.0
total_MAE_test = 0.0
total_RMSE_test = 0.0
total_samples = 0

with torch.no_grad():
    for i in range(0, Test_No, batch_size):

        X_mb = torch.tensor(testX[i:i+batch_size], dtype=torch.float32)
        M_mb = torch.tensor(testM[i:i+batch_size], dtype=torch.float32)
        

        imputed_data = model(X_mb, M_mb)
        

        mse_test = torch.mean(torch.square((imputed_data - X_mb)) * (1 - M_mb))  
        total_MSE_test += mse_test.item() * torch.sum(1 - M_mb).item() 
    
        mae_test = torch.mean(torch.abs((1 - M_mb) * (X_mb - imputed_data))).item()  
        total_MAE_test += mae_test * torch.sum(1 - M_mb).item() 
    
        rmse_test = torch.sqrt(mse_test) 
        total_RMSE_test += rmse_test.item() * torch.sum(1 - M_mb).item()  

        total_samples += torch.sum(1 - M_mb).item()  

avg_MSE_test = total_MSE_test / total_samples
avg_MAE_test = total_MAE_test / total_samples
avg_RMSE_test = total_RMSE_test / total_samples
