In [None]:
import pandas as pd

# Load the same datasets as in uncertainty.ipynb
# Classification datasets (medical)
df_class_simple = pd.read_csv('breast_cancer.csv')
df_class_med = pd.read_csv('heart_disease.csv')
df_class_complex = pd.read_csv('diabetes.csv')

# Regression datasets
df_reg_simple = pd.read_csv('housing.csv')
df_reg_med = pd.read_csv('real_estate_valuation.csv')
df_reg_complex = pd.read_csv('Housing.csv')

print("Datasets loaded successfully!")
print(f"Classification datasets: {df_class_simple.shape}, {df_class_med.shape}, {df_class_complex.shape}")
print(f"Regression datasets: {df_reg_simple.shape}, {df_reg_med.shape}, {df_reg_complex.shape}")

In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [None]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1, task_type='classification'):
        super(SimpleNN, self).__init__()
        self.task_type = task_type
        self.output_size = output_size

        # Hidden layer
        self.hidden = nn.Linear(input_size, hidden_size)
        self.hidden_act = nn.LeakyReLU()

        # Output layer
        self.output = nn.Linear(hidden_size, output_size)

        # Final activations
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.hidden_act(self.hidden(x))
        x = self.output(x)
        if self.task_type == 'classification':
            x = self.sigmoid(x)
        return x

    def predict_proba(self, x):
        self.eval()
        with torch.no_grad():
            out = self.forward(x)
            if self.output_size == 1:
                probs = out
                return torch.cat([1 - probs, probs], dim=1)
            else:
                return out
    
    def get_hidden_activation(self, x):
        with torch.no_grad():
            hidden_input = self.hidden(x)
            hidden_output = self.hidden_act(hidden_input)
            return hidden_output
    
    def get_weights(self):
        return {
            'v_ji': self.hidden.weight.data.numpy(),  # input to hidden weights
            'w_kj': self.output.weight.data.numpy(),  # hidden to output weights
            'hidden_bias': self.hidden.bias.data.numpy(),
            'output_bias': self.output.bias.data.numpy()
        }

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score, mean_squared_error, r2_score, confusion_matrix, classification_report
import math

class ActiveLearner:
    def __init__(self, model, optimizer, criterion, device='cpu'):
        self.model = model.to(device)
        self.optimizer = optimizer
        self.criterion = criterion
        self.device = device

    def train_epoch(self, train_loader):
        self.model.train()
        total_loss = 0.0
        n_batches = 0
        for batch_x, batch_y in train_loader:
            batch_x = batch_x.to(self.device)
            batch_y = batch_y.to(self.device)

            self.optimizer.zero_grad()
            outputs = self.model(batch_x)
            
            # Fix tensor shape issues - ensure proper dimensions
            if outputs.dim() > 1:
                outputs = outputs.squeeze(1)  # Remove only dimension 1, not all
            if batch_y.dim() == 0:
                batch_y = batch_y.unsqueeze(0)  # Add batch dimension if missing
            
            # Ensure both tensors have the same shape
            outputs = outputs.view(-1)
            batch_y = batch_y.view(-1).float()
            
            loss = self.criterion(outputs, batch_y)
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()
            n_batches += 1

        return total_loss / max(1, n_batches)

    def evaluate_classification(self, val_loader):
        self.model.eval()
        all_preds = []
        all_true = []
        val_loss = 0.0
        
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x = batch_x.to(self.device)
                batch_y = batch_y.to(self.device)
                outputs = self.model(batch_x)
                
                # Fix tensor shape issues
                if outputs.dim() > 1:
                    outputs = outputs.squeeze(1)
                if batch_y.dim() == 0:
                    batch_y = batch_y.unsqueeze(0)
                
                outputs = outputs.view(-1)
                batch_y = batch_y.view(-1).float()
                
                val_loss += self.criterion(outputs, batch_y).item()
                
                # Convert predictions properly
                preds = (outputs > 0.5).long().cpu().numpy()
                all_preds.extend(preds)
                all_true.extend(batch_y.cpu().numpy().astype(int))
        
        # Calculate metrics using sklearn
        accuracy = accuracy_score(all_true, all_preds)
        precision = precision_score(all_true, all_preds, average='weighted', zero_division=0)
        recall = recall_score(all_true, all_preds, average='weighted', zero_division=0)
        f1 = f1_score(all_true, all_preds, average='weighted', zero_division=0)
        conf_matrix = confusion_matrix(all_true, all_preds)
        class_report = classification_report(all_true, all_preds)

        avg_loss = val_loss / len(val_loader)
        return accuracy, avg_loss, precision, recall, f1, conf_matrix, class_report
    

    def evaluate_regression(self, val_loader):
        self.model.eval()
        val_loss = 0.0
        all_preds = []
        all_true = []
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x = batch_x.to(self.device)
                batch_y = batch_y.to(self.device)
                outputs = self.model(batch_x)
                
                # Fix tensor shape issues for regression
                if outputs.dim() > 1:
                    outputs = outputs.squeeze(1)
                if batch_y.dim() == 0:
                    batch_y = batch_y.unsqueeze(0)
                
                outputs = outputs.view(-1)
                batch_y = batch_y.view(-1).float()
                
                val_loss += self.criterion(outputs, batch_y).item()
                all_preds.extend(outputs.cpu().numpy().tolist())
                all_true.extend(batch_y.cpu().numpy().tolist())

        avg_loss = val_loss / max(1, len(val_loader))
        mse = mean_squared_error(all_true, all_preds)
        rmse = math.sqrt(mse)
        r2 = r2_score(all_true, all_preds)
        return mse, rmse, r2, avg_loss

In [None]:
# Cell 3: Vectorized Sensitivity Analysis Implementation
class SensitivityAnalysis:
    @staticmethod
    def compute_sensitivity_matrix_vectorized(network, X_batch):
        network.eval()
        with torch.no_grad():
            X_tensor = torch.FloatTensor(X_batch)
            
            # Get weights
            weights = network.get_weights()
            v_ji = weights['v_ji']  # [hidden_size, input_size]
            w_kj = weights['w_kj']  # [output_size, hidden_size]
            
            # Forward pass to get activations
            hidden_activations = network.get_hidden_activation(X_tensor).numpy()  # [batch_size, hidden_size]
            outputs = network(X_tensor).numpy()  # [batch_size, output_size]
            
            batch_size = X_batch.shape[0]
            input_size = X_batch.shape[1]
            hidden_size = hidden_activations.shape[1]
            output_size = outputs.shape[1] if outputs.ndim > 1 else 1
            
            if output_size == 1 and outputs.ndim == 1:
                outputs = outputs.reshape(-1, 1)
            
            # Vectorized computation of sensitivity matrix
            # Shape: [batch_size, output_size, input_size]
            S_oz_batch = np.zeros((batch_size, output_size, input_size))
            
            for b in range(batch_size):
                o_k = outputs[b]  # [output_size]
                y_j = hidden_activations[b]  # [hidden_size]
                
                # Compute (1-y_j) * y_j for sigmoid derivative
                sigmoid_deriv = (1 - y_j) * y_j  # [hidden_size]
                
                # Compute w_kj * sigmoid_deriv for each output
                # w_kj: [output_size, hidden_size], sigmoid_deriv: [hidden_size]
                weighted_deriv = w_kj * sigmoid_deriv[np.newaxis, :]  # [output_size, hidden_size]
                
                # Multiply by input weights v_ji
                # weighted_deriv: [output_size, hidden_size], v_ji: [hidden_size, input_size]
                inner_sum = np.dot(weighted_deriv, v_ji)  # [output_size, input_size]
                
                # Apply output sigmoid derivative
                output_deriv = (1 - o_k) * o_k  # [output_size]
                S_oz_batch[b] = output_deriv[:, np.newaxis] * inner_sum  # [output_size, input_size]
            
            return S_oz_batch
    
    @staticmethod
    def compute_output_sensitivity_vector_vectorized(S_oz_batch, norm_type='euclidean'):
        batch_size, output_size, input_size = S_oz_batch.shape
        S_o_batch = np.zeros((batch_size, output_size))
        
        for b in range(batch_size):
            for k in range(output_size):
                if norm_type == 'sum':
                    S_o_batch[b, k] = np.sum(np.abs(S_oz_batch[b, k, :]))
                elif norm_type == 'euclidean':
                    S_o_batch[b, k] = np.sqrt(np.sum(S_oz_batch[b, k, :] ** 2))
        
        return S_o_batch
    
    @staticmethod
    def compute_pattern_informativeness_vectorized(S_o_batch, norm_type='max'):
        batch_size, output_size = S_o_batch.shape
        informativeness_batch = np.zeros(batch_size)
        
        for b in range(batch_size):
            if norm_type == 'max':
                informativeness_batch[b] = np.max(np.abs(S_o_batch[b]))
            else:
                informativeness_batch[b] = np.linalg.norm(S_o_batch[b])
        
        return informativeness_batch
    
    # Keep old methods for compatibility
    @staticmethod
    def compute_sensitivity_matrix(network, x):
        if len(x.shape) == 1:
            x = x.reshape(1, -1)
        result = SensitivityAnalysis.compute_sensitivity_matrix_vectorized(network, x)
        return result[0]
    
    @staticmethod
    def compute_output_sensitivity_vector(S_oz, norm_type='sum'):
        if len(S_oz.shape) == 2:
            S_oz = S_oz.reshape(1, S_oz.shape[0], S_oz.shape[1])
        result = SensitivityAnalysis.compute_output_sensitivity_vector_vectorized(S_oz, norm_type)
        return result[0]
    
    @staticmethod
    def compute_pattern_informativeness(S_o, norm_type='max'):
        if len(S_o.shape) == 1:
            S_o = S_o.reshape(1, -1)
        result = SensitivityAnalysis.compute_pattern_informativeness_vectorized(S_o, norm_type)
        return result[0]


In [None]:
def load_and_prepare_dataset(dataset, task_type=None, target_column=None, random_state=42):
    dataset = dataset.copy()
    dataset = dataset.dropna()
    
    # Auto-detect task type and target if not specified
    if task_type is None or target_column is None:
        # Classification targets
        classification_targets = ['Default', 'loan_status', 'LoanApproved', 'Diagnosis', 'Outcome', 'num']
        # Regression targets  
        regression_targets = ['median_house_value', 'Y house price of unit area', 'price']
        
        for col in classification_targets:
            if col in dataset.columns:
                target_column = col
                task_type = 'classification'
                break
        
        if target_column is None:
            for col in regression_targets:
                if col in dataset.columns:
                    target_column = col
                    task_type = 'regression'
                    break
    
    if target_column is None or target_column not in dataset.columns:
        available_cols = list(dataset.columns)
        raise ValueError(f"No suitable target column found. Available columns: {available_cols}")
    
    print(f"Using target column: {target_column} for {task_type}")
    
    # Separate features and target
    X = dataset.drop(columns=[target_column])
    y = dataset[target_column]
    
    # Handle categorical and binary features in X
    categorical_cols = X.select_dtypes(include=['object']).columns
    for col in categorical_cols:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col].astype(str))
    
    # Handle binary columns (columns with only 2 unique values)
    for col in X.columns:
        if X[col].nunique() == 2 and X[col].dtype in ['object', 'bool']:
            le = LabelEncoder()
            X[col] = le.fit_transform(X[col])
    
    # Convert to numpy arrays first
    X = X.astype(np.float32).values
    
    # Handle target variable
    if task_type == 'classification':
        if y.dtype == 'object':
            le = LabelEncoder()
            y = le.fit_transform(y)
        # Convert to binary if multi-class
        if len(np.unique(y)) > 2:
            # For multi-class, convert to binary (0 vs >0)
            y = (y > 0).astype(int)
        y = np.array(y, dtype=np.float32)
        
        # Ensure binary classification
        print(f"Target distribution: {np.unique(y, return_counts=True)}")
    else:  # regression
        y = np.array(y, dtype=np.float32)
        print(f"Original target range: [{y.min():.2f}, {y.max():.2f}]")
        print(f"Target mean: {y.mean():.2f}, std: {y.std():.2f}")
    
    # Split into training and test sets
    if task_type == 'classification':
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_state, stratify=y)
    else:
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_state)
    
    # Standardize features using StandardScaler (better for neural networks)
    from sklearn.preprocessing import StandardScaler
    feature_scaler = StandardScaler()
    X_train = feature_scaler.fit_transform(X_train)
    X_test = feature_scaler.transform(X_test)
    
    # For regression, also scale the target values to prevent numerical issues
    target_scaler = None
    if task_type == 'regression':
        target_scaler = StandardScaler()
        y_train_original = y_train.copy()  # Keep original for reference
        y_test_original = y_test.copy()
        
        # Scale targets to have mean 0 and std 1
        y_train = target_scaler.fit_transform(y_train.reshape(-1, 1)).flatten()
        y_test = target_scaler.transform(y_test.reshape(-1, 1)).flatten()
        
        print(f"Scaled target range: [{y_train.min():.3f}, {y_train.max():.3f}]")
        print(f"Scaled target mean: {y_train.mean():.3f}, std: {y_train.std():.3f}")
    
    print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}, Features: {X_train.shape[1]}")
    print(f"Feature range after scaling: [{X_train.min():.3f}, {X_train.max():.3f}]")
    
    # Return scalers for potential inverse transformation
    if task_type == 'regression':
        return X_train, y_train, X_test, y_test, feature_scaler, target_scaler
    else:
        return X_train, y_train, X_test, y_test, feature_scaler

In [None]:
# SASLA (Sensitivity Analysis-based Selective Learning Algorithm) Class
class SASLA_class:
    def __init__(self, input_size, hidden_size, output_size=1, learning_rate=0.01, 
                 alpha=0.9, task_type='classification', device='cpu', weight_decay=0.0001):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.alpha = alpha  # Selection parameter (higher = more selective)
        self.task_type = task_type
        self.device = device
        self.weight_decay = weight_decay

        # Initialize model
        self.model = SimpleNN(input_size, hidden_size, output_size, task_type).to(device)
        
        # Initialize optimizer and loss function
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate, weight_decay=weight_decay)
        if task_type == 'classification':
            self.criterion = nn.BCELoss()
        else:
            self.criterion = nn.MSELoss()
        
        # Initialize active learner
        self.active_learner = ActiveLearner(self.model, self.optimizer, self.criterion, device)
        
        # Initialize sensitivity analyzer
        self.sensitivity_analyzer = SensitivityAnalysis()
        
        print(f"SASLA initialized: {input_size}->{hidden_size}->{output_size}, α={alpha}, task={task_type}")
    
    def select_informative_patterns_vectorized(self, X_candidate, y_candidate, batch_size=256):
        P_C = len(X_candidate)
        informativeness = np.zeros(P_C)
        
        # Process in batches for memory efficiency
        for start_idx in range(0, P_C, batch_size):
            end_idx = min(start_idx + batch_size, P_C)
            X_batch = X_candidate[start_idx:end_idx]
            
            # Vectorized computation for the batch
            S_oz_batch = self.sensitivity_analyzer.compute_sensitivity_matrix_vectorized(self.model, X_batch)
            S_o_batch = self.sensitivity_analyzer.compute_output_sensitivity_vector_vectorized(
                S_oz_batch, norm_type='euclidean')
            informativeness_batch = self.sensitivity_analyzer.compute_pattern_informativeness_vectorized(
                S_o_batch, norm_type='max')
            
            informativeness[start_idx:end_idx] = informativeness_batch
        
        avg_informativeness = np.mean(informativeness)
        threshold = (1 - self.alpha) * avg_informativeness
        selected_indices = informativeness > threshold
        
        return selected_indices, informativeness, avg_informativeness
    
    def select_informative_patterns(self, X_candidate, y_candidate):
        return self.select_informative_patterns_vectorized(X_candidate, y_candidate)
    
    def train(self, X_train, y_train, X_test, y_test, max_epochs=100, batch_size=32, verbose=False):
        history = {
            'epochs': [],
            'training_error': [],
            'validation_error': [],  # Added validation loss tracking
            'selected_patterns': [],
            'total_patterns': []
        }
        
        for epoch in range(max_epochs):
            if epoch == 0:
                # Use full candidate set for first epoch
                selected_mask = np.ones(len(X_train), dtype=bool)
                avg_info = None
            else:
                # Select informative patterns using vectorized method
                selected_mask, informativeness, avg_info = self.select_informative_patterns_vectorized(X_train, y_train)

                    
            # Get selected patterns
            X_selected = X_train[selected_mask]
            y_selected = y_train[selected_mask]
            
            if len(X_selected) == 0:
                continue
            
            # Create data loader for selected patterns
            dataset = TensorDataset(torch.FloatTensor(X_selected), torch.FloatTensor(y_selected))
            dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
            
            # Train one epoch
            train_loss = self.active_learner.train_epoch(dataloader)
            
            # Calculate validation loss
            val_dataset = TensorDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))
            val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
            
            # Calculate validation loss
            self.model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for batch_x, batch_y in val_dataloader:
                    batch_x = batch_x.to(self.device)
                    batch_y = batch_y.to(self.device)
                    outputs = self.model(batch_x)
                    
                    # Fix tensor shape issues
                    if outputs.dim() > 1:
                        outputs = outputs.squeeze(1)
                    if batch_y.dim() == 0:
                        batch_y = batch_y.unsqueeze(0)
                    
                    outputs = outputs.view(-1)
                    batch_y = batch_y.view(-1).float()
                    
                    val_loss += self.criterion(outputs, batch_y).item()
            
            avg_val_loss = val_loss / len(val_dataloader)
            
            # Record history
            history['epochs'].append(epoch + 1)
            history['training_error'].append(train_loss)
            history['validation_error'].append(avg_val_loss)  # Record validation loss
            history['selected_patterns'].append(np.sum(selected_mask))
            history['total_patterns'].append(len(X_train))
            
            if verbose and (epoch + 1) % 1 == 0:
                selection_rate = np.sum(selected_mask) / len(X_train) * 100
                print(f"Epoch {epoch+1:3d}: Train Loss={train_loss:.6f}, Val Loss={avg_val_loss:.6f}, Selected={np.sum(selected_mask):3d}/{len(X_train)} ({selection_rate:.1f}%)")
        
        return history
    
    def evaluate(self, X_test, y_test, batch_size=32):
        dataset = TensorDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
        
        if self.task_type == 'classification':
            accuracy, loss, precision, recall, f1, conf_matrix, class_report = self.active_learner.evaluate_classification(dataloader)
            return {
                'accuracy': accuracy * 100,
                'loss': loss,
                'precision': precision,
                'recall': recall,
                'f1_score': f1,
                'confusion_matrix': conf_matrix,
                'classification_report': class_report
            }
        else:
            mse, rmse, r2, loss = self.active_learner.evaluate_regression(dataloader)
            return {
                'mse': mse,
                'rmse': rmse,
                'r2': r2,
                'loss': loss
            }

In [None]:


result = load_and_prepare_dataset(dataset=df_class_simple, task_type='classification', random_state=42)
X_train_class, y_train_class, X_test_class, y_test_class, feature_scaler = result


input_size_class = X_train_class.shape[1]
hidden_size_class = 32    # Adjust network size
learning_rate_class = 0.001  # Adjust learning rate
alpha_class = 0.9        # Adjust selectivity (0.9 = 10% selection, 0.95 = 5% selection)
max_epochs_class = 1500     # Adjust training duration
weight_decay_class = 0.01 # L2 regularization

# Create and train SASLA model
sasla_classifier = SASLA_class(
    input_size=input_size_class,
    hidden_size=hidden_size_class,
    output_size=1,
    learning_rate=learning_rate_class,
    weight_decay=weight_decay_class,
    alpha=alpha_class,
    task_type='classification',
    device='cpu'
)

# Train
history_class = sasla_classifier.train(
    X_train_class, y_train_class, X_test_class, y_test_class,
    max_epochs=max_epochs_class, verbose=True
)

# Evaluate
metrics_class = sasla_classifier.evaluate(X_test_class, y_test_class)
avg_selection_class = np.mean([s/t*100 for s, t in zip(history_class['selected_patterns'], history_class['total_patterns'])])

# 1. Training and Validation Loss Over Time
plt.figure(figsize=(8, 6))
plt.plot(history_class['epochs'], history_class['training_error'], 'b-', linewidth=2, label='Training Loss')
plt.plot(history_class['epochs'], history_class['validation_error'], 'r-', linewidth=2, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (BCE)')
plt.title('SASLA Classification - Training & Validation Loss (Diabetes)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()

# 2. Pattern Selection Over Time
plt.figure(figsize=(8, 6))
selection_rates_class = [s/t*100 for s, t in zip(history_class['selected_patterns'], history_class['total_patterns'])]
plt.plot(history_class['epochs'], selection_rates_class, 'g-', linewidth=2, label='Selection Rate')
plt.axhline(y=avg_selection_class, color='g', linestyle='--', alpha=0.7, label=f'Average: {avg_selection_class:.1f}%')
plt.xlabel('Epoch')
plt.ylabel('Selection Rate (%)')
plt.title('SASLA Classification - Pattern Selection (Diabetes)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()

# Summary Statistics
print("\n" + "="*60)
print("SASLA CLASSIFICATION SUMMARY (BREAST CANCER)")
print("="*60)
print(f"Accuracy: {metrics_class['accuracy']:.2f}%")
print(f"Precision: {metrics_class['precision']:.4f}")
print(f"Recall: {metrics_class['recall']:.4f}")
print(f"F1-Score: {metrics_class['f1_score']:.4f}")
print(f"Final Training Loss: {history_class['training_error'][-1]:.6f}")
print(f"Final Validation Loss: {history_class['validation_error'][-1]:.6f}")
print(f"Data Efficiency: Only used {avg_selection_class:.1f}% of training data")
print(f"Data Savings: {100-avg_selection_class:.1f}% reduction in training data")
print(f"Total Epochs: {max_epochs_class}")
print("\nConfusion Matrix:")
print(metrics_class['confusion_matrix'])
print("\nClassification Report:")
print(metrics_class['classification_report'])

In [None]:
result = load_and_prepare_dataset(dataset=df_class_med, task_type='classification', random_state=42)
X_train_class_med, y_train_class_med, X_test_class_med, y_test_class_med, feature_scaler_med = result

# Model parameters - EASILY ADJUSTABLE
input_size_class_med = X_train_class_med.shape[1]
hidden_size_class_med = 64    # Adjust network size
learning_rate_class_med = 0.001  # Adjust learning rate
alpha_class_med = 0.9        # Adjust selectivity
max_epochs_class_med = 1000     # Adjust training duration
weight_decay_class_med = 0.01 # L2 regularization

# Create and train SASLA model
sasla_classifier_med = SASLA_class(
    input_size=input_size_class_med,
    hidden_size=hidden_size_class_med,
    output_size=1,
    learning_rate=learning_rate_class_med,
    weight_decay=weight_decay_class_med,
    alpha=alpha_class_med,
    task_type='classification',
    device='cpu'
)

# Train
history_class_med = sasla_classifier_med.train(
    X_train_class_med, y_train_class_med, X_test_class_med, y_test_class_med,
    max_epochs=max_epochs_class_med, verbose=True
)

# Evaluate
metrics_class_med = sasla_classifier_med.evaluate(X_test_class_med, y_test_class_med)
avg_selection_class_med = np.mean([s/t*100 for s, t in zip(history_class_med['selected_patterns'], history_class_med['total_patterns'])])

# 1. Training and Validation Loss Over Time
plt.figure(figsize=(8, 6))
plt.plot(history_class_med['epochs'], history_class_med['training_error'], 'b-', linewidth=2, label='Training Loss')
plt.plot(history_class_med['epochs'], history_class_med['validation_error'], 'r-', linewidth=2, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (BCE)')
plt.title('SASLA Classification - Training & Validation Loss (Diabetes)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()

# 2. Pattern Selection Over Time
plt.figure(figsize=(8, 6))
selection_rates_class_med = [s/t*100 for s, t in zip(history_class_med['selected_patterns'], history_class_med['total_patterns'])]
plt.plot(history_class_med['epochs'], selection_rates_class_med, 'g-', linewidth=2, label='Selection Rate')
plt.axhline(y=avg_selection_class_med, color='g', linestyle='--', alpha=0.7, label=f'Average: {avg_selection_class_med:.1f}%')
plt.xlabel('Epoch')
plt.ylabel('Selection Rate (%)')
plt.title('SASLA Classification - Pattern Selection (Diabetes)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()

# Summary Statistics
print("\n" + "="*60)
print("SASLA CLASSIFICATION SUMMARY (HEART DISEASE)")
print("="*60)
print(f"Accuracy: {metrics_class_med['accuracy']:.2f}%")
print(f"Precision: {metrics_class_med['precision']:.4f}")
print(f"Recall: {metrics_class_med['recall']:.4f}")
print(f"F1-Score: {metrics_class_med['f1_score']:.4f}")
print(f"Final Training Loss: {history_class_med['training_error'][-1]:.6f}")
print(f"Final Validation Loss: {history_class_med['validation_error'][-1]:.6f}")
print(f"Data Efficiency: Only used {avg_selection_class_med:.1f}% of training data")
print(f"Data Savings: {100-avg_selection_class_med:.1f}% reduction in training data")
print(f"Total Epochs: {max_epochs_class_med}")
print("\nConfusion Matrix:")
print(metrics_class_med['confusion_matrix'])
print("\nClassification Report:")
print(metrics_class_med['classification_report'])

In [None]:
result = load_and_prepare_dataset(dataset=df_class_complex, task_type='classification', random_state=42)
X_train_class_complex, y_train_class_complex, X_test_class_complex, y_test_class_complex, feature_scaler_complex = result

# Model parameters - EASILY ADJUSTABLE
input_size_class_complex = X_train_class_complex.shape[1]
hidden_size_class_complex = 128    # Adjust network size
learning_rate_class_complex = 0.001  # Adjust learning rate
alpha_class_complex = 0.9        # Adjust selectivity
max_epochs_class_complex = 1000     # Adjust training duration (more epochs for complex dataset)
weight_decay_class_complex = 0.01 # L2 regularization

# Create and train SASLA model
sasla_classifier_complex = SASLA_class(
    input_size=input_size_class_complex,
    hidden_size=hidden_size_class_complex,
    output_size=1,
    learning_rate=learning_rate_class_complex,
    weight_decay=weight_decay_class_complex,
    alpha=alpha_class_complex,
    task_type='classification',
    device='cpu'
)

# Train
history_class_complex = sasla_classifier_complex.train(
    X_train_class_complex, y_train_class_complex, X_test_class_complex, y_test_class_complex,
    max_epochs=max_epochs_class_complex, verbose=True
)

# Evaluate
metrics_class_complex = sasla_classifier_complex.evaluate(X_test_class_complex, y_test_class_complex)
avg_selection_class_complex = np.mean([s/t*100 for s, t in zip(history_class_complex['selected_patterns'], history_class_complex['total_patterns'])])

# 1. Training and Validation Loss Over Time
plt.figure(figsize=(8, 6))
plt.plot(history_class_complex['epochs'], history_class_complex['training_error'], 'b-', linewidth=2, label='Training Loss')
plt.plot(history_class_complex['epochs'], history_class_complex['validation_error'], 'r-', linewidth=2, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (BCE)')
plt.title('SASLA Classification - Training & Validation Loss (Diabetes)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()

# 2. Pattern Selection Over Time
plt.figure(figsize=(8, 6))
selection_rates_class_complex = [s/t*100 for s, t in zip(history_class_complex['selected_patterns'], history_class_complex['total_patterns'])]
plt.plot(history_class_complex['epochs'], selection_rates_class_complex, 'g-', linewidth=2, label='Selection Rate')
plt.axhline(y=avg_selection_class_complex, color='g', linestyle='--', alpha=0.7, label=f'Average: {avg_selection_class_complex:.1f}%')
plt.xlabel('Epoch')
plt.ylabel('Selection Rate (%)')
plt.title('SASLA Classification - Pattern Selection (Diabetes)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()


# Summary Statistics
print("\n" + "="*60)
print("SASLA CLASSIFICATION SUMMARY (DIABETES)")
print("="*60)
print(f"Accuracy: {metrics_class_complex['accuracy']:.2f}%")
print(f"Precision: {metrics_class_complex['precision']:.4f}")
print(f"Recall: {metrics_class_complex['recall']:.4f}")
print(f"F1-Score: {metrics_class_complex['f1_score']:.4f}")
print(f"Final Training Loss: {history_class_complex['training_error'][-1]:.6f}")
print(f"Final Validation Loss: {history_class_complex['validation_error'][-1]:.6f}")
print(f"Data Efficiency: Only used {avg_selection_class_complex:.1f}% of training data")
print(f"Data Savings: {100-avg_selection_class_complex:.1f}% reduction in training data")
print(f"Total Epochs: {max_epochs_class_complex}")
print("\nConfusion Matrix:")
print(metrics_class_complex['confusion_matrix'])
print("\nClassification Report:")
print(metrics_class_complex['classification_report'])

In [None]:
result = load_and_prepare_dataset(dataset=df_reg_simple, task_type='regression', random_state=42)
if len(result) == 6:
    X_train_reg, y_train_reg, X_test_reg, y_test_reg, feature_scaler, target_scaler = result
else:
    X_train_reg, y_train_reg, X_test_reg, y_test_reg, feature_scaler = result
    target_scaler = None

# Model parameters - EASILY ADJUSTABLE
input_size_reg = X_train_reg.shape[1]
hidden_size_reg = 64    # Adjust network size
learning_rate_reg = 0.0001  # Adjust learning rate
alpha_reg = 0.9        # Adjust selectivity (0.9 = 10% selection, 0.95 = 5% selection)
max_epochs_reg = 1500     # Adjust training duration
weight_decay_reg = 0.01 # L2 regularization

# Create and train SASLA model
sasla_regressor = SASLA_class(
    input_size=input_size_reg,
    hidden_size=hidden_size_reg,
    output_size=1,
    learning_rate=learning_rate_reg,
    weight_decay=weight_decay_reg,
    alpha=alpha_reg,
    task_type='regression',
    device='cpu'
)

# Train
history_reg = sasla_regressor.train(
    X_train_reg, y_train_reg, X_test_reg, y_test_reg,
    max_epochs=max_epochs_reg, verbose=True
)

# Evaluate
metrics_reg = sasla_regressor.evaluate(X_test_reg, y_test_reg)
avg_selection_reg = np.mean([s/t*100 for s, t in zip(history_reg['selected_patterns'], history_reg['total_patterns'])])

# Individual Plot 1: Training and Validation Loss Over Time
plt.figure(figsize=(10, 6))
plt.plot(history_reg['epochs'], history_reg['training_error'], 'b-', linewidth=2, label='Training Loss')
plt.plot(history_reg['epochs'], history_reg['validation_error'], 'r-', linewidth=2, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.title('SASLA Regression - Training & Validation Loss')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

# Individual Plot 2: Pattern Selection Over Time
selection_rates_reg = [s/t*100 for s, t in zip(history_reg['selected_patterns'], history_reg['total_patterns'])]
plt.figure(figsize=(10, 6))
plt.plot(history_reg['epochs'], selection_rates_reg, 'g-', linewidth=2, label='Selection Rate')
plt.axhline(y=avg_selection_reg, color='g', linestyle='--', alpha=0.7, label=f'Average: {avg_selection_reg:.1f}%')
plt.xlabel('Epoch')
plt.ylabel('Selection Rate (%)')
plt.title('SASLA Regression - Pattern Selection')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

# Summary Statistics
print("\n" + "="*60)
print("SASLA REGRESSION SUMMARY")
print("="*60)
print(f"R² Score: {metrics_reg['r2']:.4f}")
print(f"RMSE (scaled): {metrics_reg['rmse']:.4f}")
print(f"MSE (scaled): {metrics_reg['mse']:.4f}")
print(f"Final Training Loss: {history_reg['training_error'][-1]:.6f}")
print(f"Final Validation Loss: {history_reg['validation_error'][-1]:.6f}")
print(f"Data Efficiency: Only used {avg_selection_reg:.1f}% of training data")
print(f"Data Savings: {100-avg_selection_reg:.1f}% reduction in training data")
print(f"Total Epochs: {max_epochs_reg}")

In [None]:
result = load_and_prepare_dataset(dataset=df_reg_med, task_type='regression', random_state=42)
if len(result) == 6:
    X_train_reg, y_train_reg, X_test_reg, y_test_reg, feature_scaler, target_scaler = result
else:
    X_train_reg, y_train_reg, X_test_reg, y_test_reg, feature_scaler = result
    target_scaler = None

# Model parameters - EASILY ADJUSTABLE
input_size_reg = X_train_reg.shape[1]
hidden_size_reg = 64    # Adjust network size
learning_rate_reg = 0.0005  # Adjust learning rate
alpha_reg = 0.9        # Adjust selectivity (0.9 = 10% selection, 0.95 = 5% selection)
max_epochs_reg = 1500     # Adjust training duration
weight_decay_reg = 0.01 # L2 regularization

# Create and train SASLA model
sasla_regressor = SASLA_class(
    input_size=input_size_reg,
    hidden_size=hidden_size_reg,
    output_size=1,
    learning_rate=learning_rate_reg,
    weight_decay=weight_decay_reg,
    alpha=alpha_reg,
    task_type='regression',
    device='cpu'
)

# Train
history_reg = sasla_regressor.train(
    X_train_reg, y_train_reg, X_test_reg, y_test_reg,
    max_epochs=max_epochs_reg, verbose=True
)

# Evaluate
metrics_reg = sasla_regressor.evaluate(X_test_reg, y_test_reg)
avg_selection_reg = np.mean([s/t*100 for s, t in zip(history_reg['selected_patterns'], history_reg['total_patterns'])])

# Individual Plot 1: Training and Validation Loss Over Time
plt.figure(figsize=(10, 6))
plt.plot(history_reg['epochs'], history_reg['training_error'], 'b-', linewidth=2, label='Training Loss')
plt.plot(history_reg['epochs'], history_reg['validation_error'], 'r-', linewidth=2, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.title('SASLA Regression - Training & Validation Loss')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

# Individual Plot 2: Pattern Selection Over Time
selection_rates_reg = [s/t*100 for s, t in zip(history_reg['selected_patterns'], history_reg['total_patterns'])]
plt.figure(figsize=(10, 6))
plt.plot(history_reg['epochs'], selection_rates_reg, 'g-', linewidth=2, label='Selection Rate')
plt.axhline(y=avg_selection_reg, color='g', linestyle='--', alpha=0.7, label=f'Average: {avg_selection_reg:.1f}%')
plt.xlabel('Epoch')
plt.ylabel('Selection Rate (%)')
plt.title('SASLA Regression - Pattern Selection')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

# Summary Statistics
print("\n" + "="*60)
print("SASLA REGRESSION SUMMARY")
print("="*60)
print(f"R² Score: {metrics_reg['r2']:.4f}")
print(f"RMSE (scaled): {metrics_reg['rmse']:.4f}")
print(f"MSE (scaled): {metrics_reg['mse']:.4f}")
print(f"Final Training Loss: {history_reg['training_error'][-1]:.6f}")
print(f"Final Validation Loss: {history_reg['validation_error'][-1]:.6f}")
print(f"Data Efficiency: Only used {avg_selection_reg:.1f}% of training data")
print(f"Data Savings: {100-avg_selection_reg:.1f}% reduction in training data")
print(f"Total Epochs: {max_epochs_reg}")

In [None]:

result = load_and_prepare_dataset(dataset=df_reg_complex, task_type='regression', random_state=42)
if len(result) == 6:
    X_train_reg, y_train_reg, X_test_reg, y_test_reg, feature_scaler, target_scaler = result
else:
    X_train_reg, y_train_reg, X_test_reg, y_test_reg, feature_scaler = result
    target_scaler = None

# Model parameters - EASILY ADJUSTABLE
input_size_reg = X_train_reg.shape[1]
hidden_size_reg = 64    # Adjust network size
learning_rate_reg = 0.001  # Adjust learning rate
alpha_reg = 0.9        # Adjust selectivity (0.9 = 10% selection, 0.95 = 5% selection)
max_epochs_reg = 1500     # Adjust training duration
weight_decay_reg = 0.01 # L2 regularization

# Create and train SASLA model
sasla_regressor = SASLA_class(
    input_size=input_size_reg,
    hidden_size=hidden_size_reg,
    output_size=1,
    learning_rate=learning_rate_reg,
    weight_decay=weight_decay_reg,
    alpha=alpha_reg,
    task_type='regression',
    device='cpu'
)

# Train
history_reg = sasla_regressor.train(
    X_train_reg, y_train_reg, X_test_reg, y_test_reg,
    max_epochs=max_epochs_reg, verbose=True
)

# Evaluate
metrics_reg = sasla_regressor.evaluate(X_test_reg, y_test_reg)
avg_selection_reg = np.mean([s/t*100 for s, t in zip(history_reg['selected_patterns'], history_reg['total_patterns'])])

# Individual Plot 1: Training and Validation Loss Over Time
plt.figure(figsize=(10, 6))
plt.plot(history_reg['epochs'], history_reg['training_error'], 'b-', linewidth=2, label='Training Loss')
plt.plot(history_reg['epochs'], history_reg['validation_error'], 'r-', linewidth=2, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.title('SASLA Regression - Training & Validation Loss')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

# Individual Plot 2: Pattern Selection Over Time
selection_rates_reg = [s/t*100 for s, t in zip(history_reg['selected_patterns'], history_reg['total_patterns'])]
plt.figure(figsize=(10, 6))
plt.plot(history_reg['epochs'], selection_rates_reg, 'g-', linewidth=2, label='Selection Rate')
plt.axhline(y=avg_selection_reg, color='g', linestyle='--', alpha=0.7, label=f'Average: {avg_selection_reg:.1f}%')
plt.xlabel('Epoch')
plt.ylabel('Selection Rate (%)')
plt.title('SASLA Regression - Pattern Selection')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

# Summary Statistics
print("\n" + "="*60)
print("SASLA REGRESSION SUMMARY")
print("="*60)
print(f"R² Score: {metrics_reg['r2']:.4f}")
print(f"RMSE (scaled): {metrics_reg['rmse']:.4f}")
print(f"MSE (scaled): {metrics_reg['mse']:.4f}")
print(f"Final Training Loss: {history_reg['training_error'][-1]:.6f}")
print(f"Final Validation Loss: {history_reg['validation_error'][-1]:.6f}")
print(f"Data Efficiency: Only used {avg_selection_reg:.1f}% of training data")
print(f"Data Savings: {100-avg_selection_reg:.1f}% reduction in training data")
print(f"Total Epochs: {max_epochs_reg}")