In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple neural network for b
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 10)  # Hidden layer with 10 neurons
        self.fc2 = nn.Linear(10, 1)          # Output layer

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# Reinitializing the data
n = 3
K = 4

# Random vectors and matrices for testing
X = np.array([[1, 2, 3], [2,3,4], [3,1,5]])
X_prime = np.array([[4, 5, 6], [7, 8, 9], [10, 11, 12]])
Theta = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [0.5, 0.5, 0.5]])
M = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [10, 11, 12]
])

lambda_val = 2.5
nu_sample = np.array([[1.5, 2.5, 3.5, 4.5], 
                      [2.5, 3.5, 4.5, 5.5], 
                      [3.5, 4.5, 5.5, 6.5]])

model = SimpleNN(X[0].shape[0])
criterion = nn.MSELoss()


In [150]:
def compute_derivative(Theta, M, x_i, X_prime, model, criterion, lambda_val, nu):
    """
    Computes the derivative including the additional term using rigorous linear algebra operations.
    The function b is approximated using a simple neural network.
    
    Parameters:
    - Theta: The matrix of theta vectors.
    - M: The matrix of mu coefficients.
    - x_i: The vector x_i.
    - X_prime: The matrix containing vectors x'_j as columns.
    - lambda_val: Scalar constant lambda.
    - nu: Matrix of coefficients nu.
    
    Returns:
    - The computed derivative.
    """
    
    # Step 1: Calculate the difference matrix Delta
    ones_vector = np.ones((X_prime.shape[0], 1))
    broadcasted_xi = np.outer(Theta.dot(x_i), ones_vector.T)
    Delta_rigorous = broadcasted_xi - Theta.dot(X_prime.T)

    # Step 2: Element-wise multiplication by mu_{ij}^{(theta)}
    WeightedDiff_rigorous = M * Delta_rigorous

    # Step 3: Compute the final matrix representation
    ResultMatrix_rigorous = np.dot(Theta.T, WeightedDiff_rigorous)

    # Step 4: Sum over the columns to obtain the final result
    FinalResult_rigorous = ResultMatrix_rigorous.dot(ones_vector).flatten()

    # Additional term computation using PyTorch
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    
    # Convert x_i and X_prime to PyTorch tensors
    x_i_tensor = torch.tensor(x_i, dtype=torch.float32, requires_grad=True)
    X_prime_tensor = torch.tensor(X_prime, dtype=torch.float32)
    
    # Forward pass for b(x_i)
    b_xi = model(x_i_tensor)

    # Forward pass for b(x'_j)
    b_x_prime = model(X_prime_tensor.T)
    
    # Compute the difference
    diff_b = (b_xi - b_x_prime).T
    
    # Compute the loss and perform backpropagation
    loss = criterion(b_xi, b_x_prime.mean())
    optimizer.zero_grad()

    # Compute the loss and perform backpropagation
    loss.backward()
    db_xi = x_i_tensor.grad.numpy()
    
    # Combine the results from PyTorch with the previous computations
    summation_term = (nu * diff_b.detach().numpy().T).sum(axis=1)
    additional_term = lambda_val * summation_term * db_xi
    
    # Combine the results
    derivative = FinalResult_rigorous + additional_term
    
    return derivative

def compute_gradient_Q(X, Theta, M, x_prime, model, criterion, lambda_val, nu):
    """
    Computes the gradient of Q(x) using rigorous linear algebra operations.
    
    Parameters:
    - x: The vector for which the gradient is computed.
    - Theta: The matrix of theta vectors.
    - M: The matrix of mu coefficients.
    - x_prime: The matrix containing vectors x'_j as columns.
    
    Returns:
    - The gradient of Q(x).
    """
    
    # Initialize an empty result matrix to store the derivatives for each x_i
    gradient_matrix = np.zeros(X.shape)
    
    # Iterate over each element of x and compute the derivative
    for i in range(X.shape[0]):
        gradient_matrix[i] = compute_derivative(Theta, M, X[i], x_prime, model, criterion, lambda_val, nu)
    
    # # Sum over the rows to obtain the final result for each x_i
    # gradient = gradient_matrix.sum(axis=0)
    
    return gradient_matrix


In [151]:
# Testing the function
compute_gradient_Q(X, Theta, M, X_prime, model, criterion, lambda_val, nu_sample)

array([[-214.26692291, -309.36828849, -390.46088327],
       [-176.75152936, -252.32785901, -318.77401467],
       [-180.73966363, -259.68517924, -287.24941694]])