In [2]:
# Name:Yatharth
import numpy as np
from sklearn.neural_network import BernoulliRBM
from itertools import combinations
from sklearn.metrics import mean_squared_error

def greedy_rbm_training(data, num_layers):
    # Initialize variables
    num_data, num_features = data.shape
    trained_layers = []  # To store feature subsets and hidden units for each layer
    input_data = data    # Start with the original data as input for the first layer
    original_features = list(range(num_features))  # Track original feature indices

    for layer in range(1, num_layers + 1):
        print(f"\nTraining Layer {layer}...")

        # Step 1: Find the best subset of features for the current layer
        best_features = None
        best_reconstruction_loss = float('inf')
        subset_size = min(3, len(original_features))  # Adjust subset size to remaining features

        for feature_subset in combinations(original_features, subset_size):
            feature_subset = list(feature_subset)

            # Train RBM on the feature subset
            rbm = BernoulliRBM(n_components=3, random_state=42)  # Start with 3 hidden units
            rbm.fit(input_data[:, feature_subset])

            # Manually compute reconstruction
            hidden_activations = rbm.transform(input_data[:, feature_subset])  # Hidden layer
            reconstructed_data = (hidden_activations @ rbm.components_) + rbm.intercept_visible_
            reconstructed_data = 1 / (1 + np.exp(-reconstructed_data))  # Sigmoid activation

            # Compute reconstruction loss
            reconstruction_loss = mean_squared_error(input_data[:, feature_subset], reconstructed_data)

            if reconstruction_loss < best_reconstruction_loss:
                best_reconstruction_loss = reconstruction_loss
                best_features = feature_subset

        print(f"  Best Feature Subset: {best_features}")
        print(f"  Reconstruction Loss: {best_reconstruction_loss}")

        # Step 2: Determine the optimal number of hidden units for the best feature subset
        best_hidden_units = None
        best_hidden_loss = float('inf')
        hidden_unit_range = range(1, 6)  # Test hidden units from 1 to 5 (can be adjusted)

        for n_hidden in hidden_unit_range:
            rbm = BernoulliRBM(n_components=n_hidden, random_state=42)
            rbm.fit(input_data[:, best_features])

            # Manually compute reconstruction
            hidden_activations = rbm.transform(input_data[:, best_features])  # Hidden layer
            reconstructed_data = (hidden_activations @ rbm.components_) + rbm.intercept_visible_
            reconstructed_data = 1 / (1 + np.exp(-reconstructed_data))  # Sigmoid activation

            # Compute reconstruction loss
            hidden_loss = mean_squared_error(input_data[:, best_features], reconstructed_data)

            if hidden_loss < best_hidden_loss:
                best_hidden_loss = hidden_loss
                best_hidden_units = n_hidden

        print(f"  Optimal Hidden Units: {best_hidden_units}")
        print(f"  Final Reconstruction Loss: {best_hidden_loss}")

        # Store the layer's results
        trained_layers.append({
            "layer": layer,
            "features": best_features,
            "hidden_units": best_hidden_units
        })

        # Transform the data for the next layer
        rbm = BernoulliRBM(n_components=best_hidden_units, random_state=42)
        rbm.fit(input_data[:, best_features])
        input_data = rbm.transform(input_data[:, best_features])  # Hidden representations

        # Update original features to reflect the new feature space
        original_features = list(range(input_data.shape[1]))

    return trained_layers

# Collect user inputs
N = int(input("Enter the number of data points (N): "))
M = int(input("Enter the number of features (M): "))
L = int(input("Enter the number of RBM layers (L): "))

# Validate constraints
if not (1 <= N <= 1000 and 1 <= M <= 1000 and 1 <= L <= 1000):
    raise ValueError("Constraints violated: 1 <= N, M, L <= 1000")
print(f"\nEnter the binary data matrix of size with space in each digit {N} x {M}:")
data = []
for i in range(N):
    row = list(map(int, input(f"Row {i + 1}: ").strip().split()))
    if len(row) != M or any(x not in [0, 1] for x in row):
        raise ValueError("Each row must have M binary (0 or 1) values.")
    data.append(row)
data = np.array(data)

# Run the RBM training algorithm
result = greedy_rbm_training(data, L)

# Display the final results
print("\nFinal Results:")
print(f"Inputs:")
print(f"N: {N}, M: {M}, L: {L}")
print(f"Data (N x M):\n{data}")
print("\nOutputs:")
for layer in result:
    print(f"Layer {layer['layer']}:")
    print(f"  Subset of Features: {layer['features']}")
    print(f"  Number of Hidden Units: {layer['hidden_units']}")


Enter the number of data points (N): 6
Enter the number of features (M): 5
Enter the number of RBM layers (L): 2

Enter the binary data matrix of size 6 x 5:
Row 1: 1 0 1 1 0
Row 2: 0 1 0 1 1
Row 3: 1 1 1 0 0
Row 4: 0 0 1 1 1
Row 5: 1 0 0 1 1
Row 6: 0 1 1 0 0 

Training Layer 1...
  Best Feature Subset: [0, 2, 4]
  Reconstruction Loss: 0.2565282345893219
  Optimal Hidden Units: 3
  Final Reconstruction Loss: 0.2565282345893219

Training Layer 2...
  Best Feature Subset: [0, 1, 2]
  Reconstruction Loss: 0.0004402580140816877
  Optimal Hidden Units: 3
  Final Reconstruction Loss: 0.0004402580140816877

Final Results:
Inputs:
N: 6, M: 5, L: 2
Data (N x M):
[[1 0 1 1 0]
 [0 1 0 1 1]
 [1 1 1 0 0]
 [0 0 1 1 1]
 [1 0 0 1 1]
 [0 1 1 0 0]]

Outputs:
Layer 1:
  Subset of Features: [0, 2, 4]
  Number of Hidden Units: 3
Layer 2:
  Subset of Features: [0, 1, 2]
  Number of Hidden Units: 3
