In [None]:
import numpy as np
import torch
from sklearn.impute import KNNImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os

dataset_file = 'RLD.csv'  


if not os.path.isfile(dataset_file):
    raise FileNotFoundError(f"数据集文件未找到: {dataset_file}")

Data = np.loadtxt(dataset_file, delimiter=",", skiprows=1)


No, Dim = Data.shape

Min_Val = np.min(Data, axis=0)
Max_Val = np.max(Data, axis=0)
Data = (Data - Min_Val) / (Max_Val + 1e-6)

p_miss = 0.1 
p_miss_vec = p_miss * np.ones((Dim, 1))
Missing = np.zeros((No, Dim))

for i in range(Dim):
    A = np.random.uniform(0., 1., size=No)
    B = A > p_miss_vec[i]
    Missing[:, i] = 1.0 * B


idx = np.random.permutation(No)

train_rate = 0.8  
Train_No = int(No * train_rate)
Test_No = No - Train_No


trainX = Data[idx[:Train_No], :]
testX = Data[idx[Train_No:], :]
trainM = Missing[idx[:Train_No], :]
testM = Missing[idx[Train_No:], :]

In [None]:
knn_imputer = KNNImputer(n_neighbors=1)
trainX_knn = trainX.copy()
trainX_knn[trainM == 0] = np.nan

testX_knn = testX.copy()
testX_knn[testM == 0] = np.nan


start_time = time.time()

knn_imputer.fit(trainX_knn)
trainX_filled_knn = knn_imputer.transform(trainX_knn)

testX_filled_knn = knn_imputer.transform(testX_knn)  



end_time = time.time()


execution_time = end_time - start_time


In [None]:
def compute_metrics(true_data, imputed_data, mask):
    testX_filled_knn = knn_imputer.transform(testX_knn)
    

    missing_mask = (mask == 0)

    mse = mean_squared_error(true_data[missing_mask], imputed_data[missing_mask])
    mae = mean_absolute_error(true_data[missing_mask], imputed_data[missing_mask])
    
    rmse = np.sqrt(mse)

    return mse, mae, rmse

test_mse_knn, test_mae_knn, test_rmse_knn = compute_metrics(testX, testX_filled_knn, testM)