In [1]:
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error
import pandas as pd

# ----------------------------- Data Generation ----------------------------- #
def create_sparse_data(N=2000, L=100, K=2, P1=10, P2=10, beta0=0, beta1=3, noise_level=1.0, seed=1):
    np.random.seed(seed)
    torch.manual_seed(seed)

    X_l = np.random.randn(N, P1).astype(np.float32)
    X_f = np.random.randn(L, P2).astype(np.float32)

    true_l = []
    true_f = []

    for k in range(K):
        prob_l = 1 / (1 + np.exp(-(beta0 + beta1 * X_l[:, k % P1])))
        l_k = np.array([np.random.randn() if np.random.rand() < p else 0 for p in prob_l])
        true_l.append(l_k)

        prob_f = 1 / (1 + np.exp(-(beta0 + beta1 * X_f[:, k % P2])))
        f_k = np.array([np.random.randn() if np.random.rand() < p else 0 for p in prob_f])
        true_f.append(f_k)

    L_mat = np.column_stack(true_l)
    F_mat = np.column_stack(true_f)
    Y_true = L_mat @ F_mat.T
    Y_obs = Y_true + np.random.randn(*Y_true.shape) * noise_level

    return Y_obs.astype(np.float32), Y_true.astype(np.float32), X_l.astype(np.float32), X_f.astype(np.float32)

# ----------------------------- Models ----------------------------- #
class AE(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64), nn.ReLU(),
            nn.Linear(64, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64), nn.ReLU(),
            nn.Linear(64, input_dim)
        )

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)

class CAE(nn.Module):
    def __init__(self, input_dim, cond_dim, latent_dim=10):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim + cond_dim, 64), nn.ReLU(),
            nn.Linear(64, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim + cond_dim, 64), nn.ReLU(),
            nn.Linear(64, input_dim)
        )

    def forward(self, x, cond):
        z = self.encoder(torch.cat([x, cond], dim=1))
        return self.decoder(torch.cat([z, cond], dim=1))

class DenoisingNCF(nn.Module):
    def __init__(self, user_dim, item_dim, hidden_dim=64):
        super().__init__()
        self.user_net = nn.Sequential(nn.Linear(user_dim, hidden_dim), nn.ReLU())
        self.item_net = nn.Sequential(nn.Linear(item_dim, hidden_dim), nn.ReLU())
        self.predictor = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim), nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x_l, x_f):
        u = self.user_net(x_l)
        i = self.item_net(x_f)
        return self.predictor(torch.cat([u, i], dim=1))

# ----------------------------- Training ----------------------------- #
def train_model(model, dataloader, mode="ae", epochs=50):
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_fn = nn.MSELoss()
    model.train()
    for _ in range(epochs):
        for batch in dataloader:
            optimizer.zero_grad()
            if mode == "ncf":
                x_l, x_f, y = batch
                y_hat = model(x_l, x_f)
                loss = loss_fn(y_hat, y)
            elif mode == "cae":
                x, cond = batch
                y_hat = model(x, cond)
                loss = loss_fn(y_hat, x)
            else:  # AE
                x = batch[0]
                y_hat = model(x)
                loss = loss_fn(y_hat, x)
            loss.backward()
            optimizer.step()

# ----------------------------- Benchmark ----------------------------- #
def run_benchmark(noise_level=1.0, seed=1):
    Y_obs, Y_true, X_l, X_f = create_sparse_data(noise_level=noise_level, seed=seed)
    batch_size = 64
    N, L = Y_obs.shape

    # AE
    ae = AE(input_dim=L)
    ae_loader = DataLoader(TensorDataset(torch.tensor(Y_obs, dtype=torch.float32)), batch_size=batch_size, shuffle=True)
    train_model(ae, ae_loader, mode="ae")
    ae.eval()
    with torch.no_grad():
        Y_ae = ae(torch.tensor(Y_obs)).numpy()

    # CAE
    cae = CAE(input_dim=L, cond_dim=X_l.shape[1])
    cae_loader = DataLoader(TensorDataset(torch.tensor(Y_obs), torch.tensor(X_l)), batch_size=batch_size, shuffle=True)
    train_model(cae, cae_loader, mode="cae")
    cae.eval()
    with torch.no_grad():
        Y_cae = cae(torch.tensor(Y_obs), torch.tensor(X_l)).numpy()

    # NCF
    x_l_tensor = torch.tensor(np.repeat(X_l, L, axis=0))
    x_f_tensor = torch.tensor(np.tile(X_f, (N, 1)))
    y_tensor = torch.tensor(Y_obs.reshape(-1, 1))  # ✅ Use Y_obs, not Y_true

    ncf = DenoisingNCF(user_dim=X_l.shape[1], item_dim=X_f.shape[1])
    ncf_loader = DataLoader(TensorDataset(x_l_tensor, x_f_tensor, y_tensor), batch_size=batch_size, shuffle=True)
    train_model(ncf, ncf_loader, mode="ncf")
    ncf.eval()
    with torch.no_grad():
        Y_ncf = ncf(x_l_tensor, x_f_tensor).numpy().reshape(N, L)

    # Evaluation
    rmse = lambda A, B: np.sqrt(mean_squared_error(A, B))
    return {
        "AE": rmse(Y_ae, Y_true),
        "CAE": rmse(Y_cae, Y_true),
        "NCF": rmse(Y_ncf, Y_true),
        "noise_level": noise_level,
        "seed": seed
    }

# ----------------------------- Main Loop ----------------------------- #
results = []
for noise in [2.0]:
    print(f"Running experiments for noise level {noise}...")
    for s in range(20):
        result = run_benchmark(noise_level=noise, seed=s + 1)
        results.append(result)
        print(f"  Seed {s+1} done.")

df = pd.DataFrame(results)
output_path = r"C:\Document\Serieux\Travail\python_work\cEBMF_additional_simulation_VAE\uniformative_denoising_results2.csv"
df.to_csv(output_path, index=False)
print(f"\n✅ Results saved to {output_path}")


Running experiments for noise level 2.0...
  Seed 1 done.
  Seed 2 done.
  Seed 3 done.
  Seed 4 done.
  Seed 5 done.
  Seed 6 done.
  Seed 7 done.
  Seed 8 done.
  Seed 9 done.
  Seed 10 done.
  Seed 11 done.
  Seed 12 done.
  Seed 13 done.
  Seed 14 done.
  Seed 15 done.
  Seed 16 done.
  Seed 17 done.
  Seed 18 done.
  Seed 19 done.
  Seed 20 done.

✅ Results saved to C:\Document\Serieux\Travail\python_work\cEBMF_additional_simulation_VAE\uniformative_denoising_results2.csv


In [None]:
results