In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset
from sklearn.impute import KNNImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import time 
torch.manual_seed(40)
np.random.seed(40)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:

dataset_file = 'RLD.csv' 
if not os.path.isfile(dataset_file):
    raise FileNotFoundError(f"数据集文件未找到: {dataset_file}")
Data = pd.read_csv(dataset_file).values  
No, Dim = Data.shape

Min_Val = np.min(Data, axis=0)
Max_Val = np.max(Data, axis=0)
Data = (Data - Min_Val) / (Max_Val + 1e-6)
print("数据归一化完成。")


In [None]:
No, Dim = Data.shape
p_miss = 0.1
p_miss_vec = p_miss * np.ones((Dim, 1))
Missing = np.zeros((No, Dim))

for i in range(Dim):
    A = np.random.uniform(0., 1., size=No)
    B = A > p_miss_vec[i]
    Missing[:, i] = 1.0 * B




In [None]:
class DataImputationDataset(Dataset):
    def __init__(self, data, missing, p_hint=0.9):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.missing = torch.tensor(missing, dtype=torch.float32)
        self.p_hint = p_hint
        self.No, self.Dim = self.data.shape
    
    def __len__(self):
        return self.No
    
    def __getitem__(self, idx):

        X = self.data[idx]            
        M = self.missing[idx]         
        New_X = X * M                 
    
        H = self.sample_Hint(M)
    
        return New_X, M, X, H
    
    def sample_Hint(self, M):
       
        A = torch.rand(M.size())
        B = A > self.p_hint
        C = B.float()
        H = C
        return H


p_hint = 0.9
mb_size =16


train_size = int(0.8 * No)
test_size = No - train_size

trainX = Data[:train_size]
testX = Data[train_size:]
trainM = Missing[:train_size]
testM = Missing[train_size:]

train_dataset = DataImputationDataset(trainX, trainM, p_hint=p_hint)
test_dataset = DataImputationDataset(testX, testM, p_hint=p_hint)


train_loader = DataLoader(train_dataset, batch_size=mb_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=mb_size, shuffle=False, drop_last=False)




In [None]:


class VAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(VAE, self).__init__()
      
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
        )
        self.fc_mu = nn.Linear(hidden_dim, latent_dim)
        self.fc_logvar = nn.Linear(hidden_dim, latent_dim)
        
   
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid(), 
        )
        
    def encode(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        return mu, logvar
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def decode(self, z):
        return self.decoder(z)
    
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        recon_x = self.decode(z)
        return recon_x, mu, logvar


In [None]:
input_dim = Dim  
hidden_dim = 64 
latent_dim =64
num_epochs =1000
learning_rate =0.1

vae = VAE(input_dim, hidden_dim, latent_dim).to(device)
optimizer = optim.Adam(vae.parameters(), lr=learning_rate)
reconstruction_loss = nn.MSELoss(reduction='sum')


In [None]:
start_time = time.time() 
from tqdm import tqdm


vae.train()
for epoch in range(1, num_epochs + 1):
    train_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs}")
    for batch_idx, (New_X, M, X, H) in enumerate(progress_bar):
        New_X = New_X.to(device)  
        M = M.to(device)          
        X = X.to(device)        
        optimizer.zero_grad()
        recon_X, mu, logvar = vae(New_X)
        loss_recon = reconstruction_loss(recon_X * M.float(), X * M.float())
        loss_kl = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        
        loss = loss_recon + loss_kl
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        progress_bar.set_postfix({"Loss": loss.item() / train_size})
    
    avg_loss = train_loss / train_size
    if epoch % 10 == 0 or epoch == 1:
        end_time = time.time() 
elapsed_time = end_time - start_time


In [None]:


vae.eval()
with torch.no_grad():

    test_New_X = testX 
    test_X = testX  
    test_M = testM 


    test_New_X_filled = np.where(test_M, test_New_X, 0)
    test_New_X_tensor = torch.tensor(test_New_X_filled, dtype=torch.float32).to(device)

    recon_test_X, _, _ = vae(test_New_X_tensor)
    recon_test_X = recon_test_X.cpu().numpy()

    testX_imputed = test_New_X_filled.copy()
    testX_imputed[~test_M.astype(bool)] = recon_test_X[~test_M.astype(bool)]



In [None]:
def compute_metrics(true_data, imputed_data, mask):
    
    missing_mask = ~mask.astype(bool)
    mse = mean_squared_error(true_data[missing_mask], imputed_data[missing_mask])
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(true_data[missing_mask], imputed_data[missing_mask])
    return mse, rmse, mae

vae_mse, vae_rmse, vae_mae = compute_metrics(testX, testX_imputed, testM)

