## CONTEXTUAL FEATURE SELECTION WITH CONDITIONAL STOCHASTIC GATES

In [1]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
import numpy as np

# Load dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Reduce to maximum 1000 rows
# X = X[:1000, :]
# y = y[:1000]

# Original column names
column_names = housing.feature_names
feature_names = np.array(column_names)
# Add noise columns
np.random.seed(42)  # For reproducibility

# Gaussian noise
gaussian_noise = np.random.normal(0, 1, size=X.shape[0])

# Uniform noise
uniform_noise = np.random.uniform(-1, 1, size=X.shape[0])

# Cosine function
cosine_values = np.cos(np.linspace(0, 10, X.shape[0]))

# Create a DataFrame from X
df = pd.DataFrame(X, columns=column_names)

# Add the noise columns to DataFrame
df['Gaussian_Noise'] = gaussian_noise
df['Uniform_Noise'] = uniform_noise
df['Cosine_Values'] = cosine_values

# Shuffle column locations
np.random.seed(42)  # Ensure reproducibility for column shuffling
shuffled_columns = np.random.permutation(df.columns)
df = df[shuffled_columns]

# Now, df is a DataFrame with shuffled columns and includes the noise features
# You can view the DataFrame as follows:
print(df.head())

# Convert target to a DataFrame and concatenate with features for a complete view
y_df = pd.DataFrame(y, columns=['Target'])
df_full = pd.concat([df, y_df], axis=1)

# If you wish to proceed with splitting and scaling:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import TensorDataset

# Splitting the data (assuming you want to keep DataFrame structure for X)
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)

# Scaling features
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

# X_train_scaled and X_test_scaled are now DataFrames with scaled features and retained column names.
# convert them to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
y_train_tensor = y_train_tensor.view(-1, 1)
y_test_tensor = y_test_tensor.view(-1, 1)

# Create TensorDataset
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# new column names
feature_names = np.array(X_train_scaled.columns)
# feature_names

   AveOccup  MedInc  Uniform_Noise  Cosine_Values  AveRooms  HouseAge  \
0  2.555556  8.3252       0.143745       1.000000  6.984127      41.0   
1  2.109842  8.3014      -0.515390       1.000000  6.238137      21.0   
2  2.802260  7.2574       0.677635       1.000000  8.288136      52.0   
3  2.547945  5.6431      -0.038881       0.999999  5.817352      52.0   
4  2.181467  3.8462       0.285882       0.999998  6.281853      52.0   

   Gaussian_Noise  Population  Longitude  AveBedrms  Latitude  
0        0.496714       322.0    -122.23   1.023810     37.88  
1       -0.138264      2401.0    -122.22   0.971880     37.86  
2        0.647689       496.0    -122.24   1.073446     37.85  
3        1.523030       558.0    -122.25   1.073059     37.85  
4       -0.234153       565.0    -122.25   1.081081     37.85  


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal, Bernoulli


In [5]:
import torch
from torch import nn
from torch.optim import Adam
# import numpy as np

# # Define the Hypernetwork
# class HyperNet(nn.Module):
#     def __init__(self, input_dim, feature_dim):
#         super(HyperNet, self).__init__()
#         self.fc1 = nn.Linear(input_dim, 128) # Adjust the size as necessary
#         self.fc2 = nn.Linear(128, feature_dim)
#         self.sigmoid = nn.Sigmoid()
        
#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         x = self.sigmoid(self.fc2(x))
#         return x

from torch.nn.init import xavier_uniform_

# class HyperNet(nn.Module):
#     def __init__(self, input_dim, feature_dim):
#         super(HyperNet, self).__init__()
#         self.fc1 = nn.Linear(input_dim, 128)
#         # Initialize weights using Xavier initialization
#         xavier_uniform_(self.fc1.weight)
#         self.fc2 = nn.Linear(128, feature_dim)
#         xavier_uniform_(self.fc2.weight)
#         self.sigmoid = nn.Sigmoid()
        
#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         # Final layer with sigmoid to ensure outputs are between 0 and 1
#         x = self.sigmoid(self.fc2(x))
#         return x

class HyperNet(nn.Module):
    def __init__(self, input_dim, feature_dim):
        super(HyperNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        xavier_uniform_(self.fc1.weight)
        self.fc2 = nn.Linear(128, feature_dim)  # feature_dim should match the number of input features
        xavier_uniform_(self.fc2.weight)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
# Define the Prediction Network
# class PredNet(nn.Module):
#     def __init__(self, feature_dim):
#         super(PredNet, self).__init__()
#         self.fc1 = nn.Linear(feature_dim, 128) # Adjust the size as necessary
#         self.fc2 = nn.Linear(128, 1) # Assuming a single output
        
#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         x = torch.relu(self.fc1(x))
#         x = self.fc2(x)
#         return x

class PredNet(nn.Module):
    def __init__(self, feature_dim):
        super(PredNet, self).__init__()
        self.fc1 = nn.Linear(feature_dim, 128)  # feature_dim should match the number of input features
        self.fc2 = nn.Linear(128, 1)

# Initialize models (assuming 11 input features)
input_dim = 11
feature_dim = input_dim  # Make sure this matches the number of input features
hypernet = HyperNet(input_dim, feature_dim)
model = PredNet(feature_dim)

def train(model, hypernet, criterion, optimizer, epochs, X_train, y_train, X_val, y_val):
    for epoch in range(epochs):
        model.train()
        hypernet.train()
        optimizer.zero_grad()
        
        # Forward pass through hypernetwork to get selection probabilities
        selection_prob = hypernet(X_train)
        # Sample from Bernoulli distribution to get feature mask
        selection_mask = torch.bernoulli(selection_prob)
        # Apply mask to input features
        selected_features = X_train * selection_mask
        
        # Make predictions with masked features
        y_pred = model(selected_features)
        loss = criterion(y_pred, y_train)
        
        # Backpropagation
        loss.backward()
        optimizer.step()

        # Validation phase
        model.eval()
        hypernet.eval()
        with torch.no_grad():
            val_selection_prob = hypernet(X_val)
            val_selection_mask = torch.bernoulli(val_selection_prob)
            val_selected_features = X_val * val_selection_mask
            val_y_pred = model(val_selected_features)
            val_loss = criterion(val_y_pred, y_val)
        
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Val Loss: {val_loss.item()}')

# Model initialization
input_dim = X_train_tensor.shape[1]
feature_dim = input_dim # Assuming context and feature dimensions are the same
hypernet = HyperNet(input_dim, feature_dim)
model = PredNet(feature_dim)
criterion = nn.MSELoss()
optimizer = Adam(list(model.parameters()) + list(hypernet.parameters()), lr=0.001)

# Training
epochs = 100 # Adjust as necessary
train(model, hypernet, criterion, optimizer, epochs, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (16512x128 and 11x128)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions.normal import Normal
from torch.distributions.bernoulli import Bernoulli


In [None]:
class Hypernetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Hypernetwork, self).__init__()
        # Define the architecture of the hypernetwork.
        self.fc1 = nn.Linear(input_dim, 128) # Input layer
        self.fc2 = nn.Linear(128, output_dim) # Output layer mapping to Bernoulli parameters

    def forward(self, x):
        # Forward pass through the network
        x = F.relu(self.fc1(x)) # Activation function for non-linearity
        x = torch.sigmoid(self.fc2(x)) # Sigmoid to ensure output is in [0,1], representing probabilities
        return x


In [None]:
class PredictionNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(PredictionNetwork, self).__init__()
        # Define the architecture of the prediction network.
        self.fc1 = nn.Linear(input_dim, 128) # Input layer
        self.fc2 = nn.Linear(128, output_dim) # Output layer for the response variable

    def forward(self, x):
        # Forward pass through the network
        x = F.relu(self.fc1(x)) # Activation function for non-linearity
        x = self.fc2(x) # Output layer
        return x


In [None]:
def train(model, hypernet, criterion, optimizer, data_loader, epochs=10):
    for epoch in range(epochs):
        for x, z, y in data_loader: # Assuming x is the feature, z is the context, y is the target
            optimizer.zero_grad()
            
            # Generate probabilities from the Hypernetwork
            probs = hypernet(z)
            
            # Sample from Bernoulli to get the feature selection mask
            m = Bernoulli(probs)
            mask = m.sample()
            
            # Apply mask and predict
            x_masked = x * mask
            predictions = model(x_masked)
            
            # Compute loss and backpropagate
            loss = criterion(predictions, y)
            loss.backward()
            optimizer.step()
        
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')
