In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
S = np.load("S.npy")
H = np.load("H.npy")
F = np.load("F.npy")
print(np.shape(S))
print(np.shape(F))
print(np.shape(H))

# Convert them to PyTorch tensors
S = torch.from_numpy(S).float()
H = torch.from_numpy(H).float()

(100000, 130)
(100000, 300)
(300, 130)


In [3]:
class MatrixFactorizationNet(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MatrixFactorizationNet, self).__init__()
        # Define the network layers
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
        self.fc4 = nn.Linear(hidden_sizes[2], output_size)
        self.softplus = nn.Softplus()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.softplus(self.fc4(x))  # Softplus activation for the output layer
        return x
    
def apply_sparsity(F, sparsity_threshold=0.9):
    """
    Apply sparsity constraint to the output matrix F by zeroing out
    a certain percentage of the smallest absolute values.
    """
    k = int(sparsity_threshold * F.numel())
    if k > 0:
        # Zero out the k smallest absolute values in F
        _, indices = torch.topk(torch.abs(F).view(-1), k, largest=False)
        F.view(-1)[indices] = 0
    return F

In [4]:
from torch.utils.data import Dataset, DataLoader

class MatrixDataset(Dataset):
    def __init__(self, S, F):
        self.S = S
        self.F = F

    def __len__(self):
        return len(self.S)

    def __getitem__(self, idx):
        return self.S[idx], self.F[idx]

In [5]:
# Separate into training and testing set
split = 0.8
num_entries = len(S)
train_len = int(num_entries * split)

S_train = S[:train_len]
F_train = F[:train_len]

S_test = S[train_len:]
F_test = F[train_len:]

In [6]:
batch_size = 32  # You can adjust this according to your GPU capacity

train_dataset = MatrixDataset(S_train, F_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = MatrixDataset(S_test, F_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [7]:
N_test, _ = np.shape(S_test)
N, n_b = np.shape(S_train)  
_, n = np.shape(F_train)
print(N)
print(n_b)
print(n)
#input_size = N * n_b
#output_size = N * n
input_size = n_b
output_size = n

# Define your network
net = MatrixFactorizationNet(input_size, [512, 256, 128], output_size)

# Flatten S for input to the network
#S_flattened = S_train.view(-1)

# Define Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

l1_lambda = 0.0001  # Regularization strength

# Training loop (simplified)
num_epochs = 3000  # Define the number of epochs
for epoch in range(num_epochs):
    total_loss = 0.0
    for S_batch, F_batch in train_loader:
        optimizer.zero_grad()

        # Assuming S_batch is a sparse tensor
        S_batch_flattened = S_batch.to_dense().view(S_batch.size(0), -1)

        # Forward pass
        F_batch_pred = net(S_batch_flattened)
        F_batch_pred_normalized = F_batch_pred / (F_batch_pred.sum(dim=1, keepdim=True) + 1e-8) # To avoid divisions by zero

        # Compute approximation of S
        SH_batch = torch.matmul(F_batch_pred_normalized, H)

        # Compute loss
        loss = criterion(SH_batch, S_batch.to_dense())  # Ensure that S_batch is converted to dense if needed

        # L1 Regularization
        l1_reg = sum(torch.sum(torch.abs(param)) for param in net.parameters())
        loss += l1_lambda * l1_reg

        # Gradient Clipping
        torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=1)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Print the average loss for this epoch
    average_loss = total_loss / len(train_loader)
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss}')


with torch.no_grad():
    total_loss = 0.0
    F_pred_matrix_normalized_list = []

    for S_batch, F_batch in test_loader:
        S_batch_flattened = S_batch.to_dense().view(S_batch.size(0), -1)

        F_batch_pred = net(S_batch_flattened)
        F_batch_pred_normalized = F_batch_pred / F_batch_pred.sum(dim=1, keepdim=True)

        SH_batch_test = torch.matmul(F_batch_pred_normalized, H)
        loss = criterion(SH_batch_test, S_batch.to_dense())

        total_loss += loss.item()
        F_pred_matrix_normalized_list.append(F_batch_pred_normalized)

    # Calculate the average loss over all test samples
    avg_loss = total_loss / len(test_loader)
    print("Average Test Loss:", avg_loss)


80000
130
300


KeyboardInterrupt: 

In [None]:
"""import numpy as np

# Define your hyperparameters to tune
learning_rates = [0.1, 0.01, 0.001]
hidden_layer_sizes = [(512, 256, 128), (256, 128), (128, 64)]
l1_lambda_values = [0.0001, 0.00001]

criterion = nn.MSELoss()

# Assuming you have your data S, H, and F
# S and H are your input data, and F is your target output data

# Define the number of folds for cross-validation
num_folds = 5

# Calculate the number of samples in your dataset
N = S.shape[0]  # Use N instead of num_samples
num_samples = N

# Create an array of shuffled indices
shuffled_indices = np.arange(N)
np.random.shuffle(shuffled_indices)

# Shuffle your data using the shuffled indices
S_shuffled = S#[shuffled_indices]
H_shuffled = H#[:, shuffled_indices]
F_shuffled = F#[:, shuffled_indices]

# Split your data into folds
fold_size = N // num_folds
validation_losses = []

num_epochs = 1000

# Perform cross-validation for each hyperparameter combination
for lr in learning_rates:
    for hidden_sizes in hidden_layer_sizes:
        for l1_lambda in l1_lambda_values:
            print(f"Hyperparameters: Learning rate = {lr}, Hidden sizes = {hidden_sizes}, L1 Lambda = {l1_lambda}")

            # Initialize a list to store validation losses for each fold
            validation_losses = []

            for fold in range(num_folds):
                # Calculate the start and end indices for the current fold
                start_idx = fold * fold_size
                end_idx = (fold + 1) * fold_size if fold < num_folds - 1 else N

                # Split your data into training and validation sets for the current fold
                S_train = np.concatenate([S_shuffled[:start_idx], S_shuffled[end_idx:]], axis=0)
                #H_train = np.concatenate([H_shuffled[:, :start_idx], H_shuffled[:, end_idx:]], axis=1)
                H_train = H
                F_train = np.concatenate([F_shuffled[:start_idx], F_shuffled[end_idx:]], axis=0)

                S_train = torch.from_numpy(S_train).float()
                F_train = torch.from_numpy(F_train).float()

                print(S_train.shape)
                print(F_train.shape)
                print(H_train.shape)

                N, n_b = np.shape(S_train)  
                _, n = np.shape(F_train)
                input_size = N * n_b
                output_size = N * n

                S_valid = S_shuffled[start_idx:end_idx]
                #H_valid = H_shuffled[:, start_idx:end_idx]
                H_valid = H
                F_valid = F_shuffled[start_idx:end_idx]

                F_valid = torch.from_numpy(F_valid).float()

                S_train_flattened = S_train.reshape(-1)

                # Initialize your network with the specified architecture
                net = MatrixFactorizationNet(input_size, hidden_sizes, output_size)

                # Initialize optimizer with the current learning rate
                optimizer = optim.Adam(net.parameters(), lr=lr)

                # Training loop for the current fold (similar to your code)
                for epoch in range(num_epochs):
                    optimizer.zero_grad()
                    # Forward pass
                    F = net(S_train_flattened)
                    F_matrix = F.view(N, n)  # Reshape to matrix form
                    SH = torch.matmul(F_matrix, H_train)  # Compute approximation of S
                    loss = criterion(SH, S_train)  # Compute loss

                    # L1 Regularization
                    l1_reg = sum(torch.sum(torch.abs(param)) for param in net.parameters())
                    loss += l1_lambda * l1_reg

                    # Backward pass and optimize
                    loss.backward()
                    optimizer.step()

                    # Apply sparsity constraint
                    with torch.no_grad():
                        F_matrix = apply_sparsity(F_matrix)

                # Calculate validation loss for the current fold
                with torch.no_grad():
                    F_valid_flattened = F_valid.reshape(-1)
                    F_valid_pred = net(F_valid_flattened)  # Predict F for validation data
                    F_valid_pred_matrix = F_valid_pred.view(F_valid.shape)  # Reshape to matrix form
                    print(F_valid_pred_matrix)
                    print(H_valid)
                    SH_valid = torch.matmul(F_valid_pred_matrix, H_valid)  # Compute approximation of S for validation
                    print("a")
                    print(SH_valid.shape)
                    print(S_valid.shape)
                    validation_loss = criterion(SH_valid, S_valid)  # Compute validation loss
                    print("b")
                    validation_losses.append(validation_loss.item())  # Append the loss value to the list

            # Calculate average validation loss across all folds for the current hyperparameter combination
            avg_validation_loss = np.mean(validation_losses)
            print(f"Average Validation Loss: {avg_validation_loss}")"""
