### MIRI UCI dataset

In [None]:
import sys
import os
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from src.imputer_wrapper import impute_now
import argparse
import torch.nn as nn
import torch.optim as optim
from uci_datasets import Dataset

### Generating Dataset

In [None]:

import numpy as np
from matplotlib import pyplot as plt

p = 0.6
seed = 1

dset = "wine"
torch.manual_seed(seed)

methods = ['miri', 'hyperimpute', 'knewimp'] 
print("methods: ", methods)
dataset = Dataset(dset) # in the paper, we also standardize the data before the iomputation. We do not do it here for simplicity of the demo
Xdata = torch.tensor(dataset.x).float().to(device)
n = Xdata.shape[0]
d = Xdata.shape[1]
def sample_ref(n):
    X = Xdata[:n, :].detach().clone()
    Xstar = X.detach().clone()
    
    M = torch.distributions.bernoulli.Bernoulli(torch.ones(n, d)*p).sample().to(device)
    X[M==0] = torch.randn(n, d).to(device)[M==0]
    
    return X.cpu(), M.cpu(), Xstar.cpu()

X0, M, Xstar = sample_ref(n)
print("Data size", "n: ", X0.shape[0], "d: ", X0.shape[1], "p: ", p)
print("Missing rate: ", 1 - M.mean().item())
print("------------ The tests begins now ------------ \n ")

### Running Imputation Methods

In [None]:
for method in methods:
    X_tilde, mmd_list, mi_list = impute_now(X0, M, Xstar, method, max_rounds=10, batchsize=50)
    torch.save([X_tilde, mmd_list, mi_list], f"res/{dset}_{seed}_{method}.pt")


### Printing Results

In [None]:
import pandas as pd


results = []
for method in methods:
    X_tilde, mmd_list, mi_list = torch.load(f"res/{dset}_{seed}_{method}.pt")
    results.append({
        "method": method,
        "mmd (the smaller the better)": float(mmd_list[-1]),
        "mi (the smaller the better)": float(mi_list[-1])
    })

df_results = pd.DataFrame(results)
print(f"Imputation Results Summary of {dset} dataset with seed {seed} and p={p}")
print(df_results)
    