#Problème - Session n°2

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

# 1. Generate Matrices M[k,n] (5x3) of size (10x10)
k = 5
n = 3
dim = 10
M = torch.randn(k, n, dim, dim)  # Random matrices

In [None]:
# 2. Generate dataset
N = 50000  # Total samples
batch_size = 1000
num_batches = N // batch_size

torch.manual_seed(42)
x_data = torch.randn(N, dim)
L_data = torch.arange(num_batches).repeat_interleave(batch_size) % k  # L values shared within 1000s

y_data = []
for i in range(N):
    L_i = L_data[i]
    x_i = x_data[i]
    y_i = torch.max(M[L_i,2] @ torch.relu(M[L_i,1] @ torch.relu(M[L_i,0] @ x_i)))
    y_data.append(y_i)

y_data = torch.tensor(y_data).unsqueeze(1)

In [None]:
# 3. Define Model
class DeepMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_batches):
        super(DeepMLP, self).__init__()
        self.theta = nn.Parameter(torch.randn(num_batches, 2))  # Latent variable
        self.fc1 = nn.Linear(input_dim + 2, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        # self.fc4 = nn.Linear(hidden_dim, hidden_dim)
        # self.fc5 = nn.Linear(hidden_dim, hidden_dim)
        # self.fc6 = nn.Linear(hidden_dim, hidden_dim)
        # self.fc7 = nn.Linear(hidden_dim, hidden_dim)
        # self.fc8 = nn.Linear(hidden_dim, hidden_dim)
        self.fc7 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, indices):
        theta_batch = self.theta[indices // 1000, :]
        noise = torch.normal(mean=torch.zeros_like(theta_batch),
                             std=torch.ones_like(theta_batch))
        x = torch.cat([x, theta_batch + noise], dim=1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        # x = torch.relu(self.fc4(x))
        # x = torch.relu(self.fc5(x))
        # x = torch.relu(self.fc6(x))
        # x = torch.relu(self.fc7(x))
        # x = torch.relu(self.fc8(x))
        x = self.fc7(x)
        return x, theta_batch

# Model and optimizer
model = DeepMLP(input_dim=dim, hidden_dim=256, output_dim=1, num_batches=num_batches)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

model = model.cuda()

In [None]:
# Training
num_epochs = 2000
for epoch in range(num_epochs):
    perm = torch.randperm(N)
    x_shuffled, y_shuffled, L_shuffled = x_data[perm], y_data[perm], L_data[perm]

    epoch_loss = 0
    for i in range(0, N, batch_size):
        indices = perm[i:i+batch_size]
        x_batch = x_shuffled[i:i+batch_size].cuda()
        y_batch = y_shuffled[i:i+batch_size].cuda()

        optimizer.zero_grad()
        y_pred, theta_batch = model(x_batch, indices)
        loss = criterion(y_pred, y_batch) + (torch.relu(theta_batch - 10) + torch.relu(-10 - theta_batch)).mean()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")



In [None]:
# Avec 5 couches:
# Extract learned theta
theta_learned = model.theta.detach().cpu().numpy()
L_colors = np.array([i % k for i in range(num_batches)])

# Plot latent space
plt.figure(figsize=(8,6))
plt.scatter(theta_learned[:,0], theta_learned[:,1], c=L_colors, cmap='viridis',
            alpha=0.7, s=2)
plt.colorbar(label='L value')
plt.xlabel('Theta 1')
plt.ylabel('Theta 2')
plt.title('Latent Space Visualization')
plt.show()

## Partie III

La solution proposée dans la partie II a un défaut: les performances chutent sur les paires d'antennes qui n'ont pas été rencontrées pendant l'entraînement. Pour le corriger, on se propose de suivre la méthode suivante:
- ajouter un perceptron "générique" de même architecture que les perceptrons spécifiques.
- entraîner le réseau de la partie II en remplaçant une fois sur quatre l'identifiant de la paire d'antennes par l'indice du perceptron générique.
- après cinquante époques, geler les poids de la partie générique et prolonger l'apprentissage des perceptrons spécifiques sur une vingtaine d'époque.

**Consignes:** \

1) Mettre en oeuvre cette méthode

2) Conclure sur son efficacité