In [None]:
import wandb
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import KFold, StratifiedKFold, StratifiedShuffleSplit, StratifiedGroupKFold
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import wandb
from itertools import product
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from gemelli.preprocessing import matrix_rclr
from sklearn.metrics import mean_absolute_error, r2_score
import math
from functools import partial
from biom import load_table
from scipy import stats

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

### version where pc vectors are projected into multiple views in a higher dim space.
class NormalizedTransformerBlock(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(NormalizedTransformerBlock, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, dropout=0, batch_first=True)
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.GELU(),
            nn.Linear(hidden_dim, input_dim),
        )
        self.alphaA = nn.Parameter(torch.tensor(1.0))  # Learnable scaling for attention updates
        self.alphaM = nn.Parameter(torch.tensor(1.0))  # Learnable scaling for MLP updates

    def forward(self, x):
        # Normalize input
        x = F.normalize(x, p=2, dim=-1)

        # Attention block
        hA, _ = self.attention(x, x, x)
        hA = F.normalize(hA, p=2, dim=-1)
        x = F.normalize(x + self.alphaA * (hA - x), p=2, dim=-1)

        # MLP block
        hM = self.mlp(x)
        hM = F.normalize(hM, p=2, dim=-1)
        x = F.normalize(x + self.alphaM * (hM - x), p=2, dim=-1)

        return x

class NormalizedTransformer(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, projection_dim=4):
        super(NormalizedTransformer, self).__init__()
        self.projection_dim = projection_dim

        # Project PCA vector to hidden_dim
        self.pca_projection = nn.Linear(input_dim, hidden_dim)

        # Generate different "views" of the projected PCA vector
        self.view_generator = nn.Sequential(
            nn.Linear(hidden_dim, projection_dim * hidden_dim),
            nn.LayerNorm(projection_dim * hidden_dim)
        )

        # Transformer blocks remain the same
        self.transformer_blocks = nn.ModuleList(
            [NormalizedTransformerBlock(hidden_dim, hidden_dim * 2) for _ in range(num_layers)]
        )
        self.regression_head = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # x shape: [batch, pca_dim]
        batch_size = x.shape[0]

        # Project PCA vector to hidden dimension
        x = self.pca_projection(x)  # Shape: [batch, hidden_dim]
        x = F.normalize(x, p=2, dim=-1)

        # Generate multiple views of the projected vector
        x = self.view_generator(x)  # Shape: [batch, projection_dim * hidden_dim]

        # Reshape to [batch, projection_dim, hidden_dim]
        x = x.view(batch_size, self.projection_dim, -1)
        x = F.normalize(x, p=2, dim=-1)

        # Pass through transformer blocks
        for block in self.transformer_blocks:
            x = block(x)

        # Global average pooling over projection dimensions
        x = x.mean(dim=1)  # Shape: [batch, hidden_dim]

        # Regression head
        output = self.regression_head(x)
        outputs = {'regression_output': output}
        return outputs


In [None]:
def calculate_sparsity(model, threshold=1e-5):
    """
    Calculate model sparsity using the formula:
    S = (1/D) * sum_{i=1}^D (1/n * sum_{j=1}^n I(a_{i,j} < τ))

    where:
    - D is the number of layers
    - n is the number of parameters in each layer
    - a_{i,j} is the j-th parameter in the i-th layer
    - τ (tau) is the threshold below which parameters are considered sparse
    - I() is the indicator function

    Args:
        model: PyTorch model
        threshold: float, threshold below which parameters are considered sparse

    Returns:
        float: sparsity score between 0 and 1
    """

    def count_sparse_elements(tensor, threshold):
        """Helper function to count elements below threshold"""
        return (torch.abs(tensor) < threshold).float().mean().item()

    # Get all parameter tensors
    params = list(model.parameters())

    # Calculate sparsity for each layer
    layer_sparsities = []
    for param in params:
        if param.dim() > 0:  # Skip scalar parameters
            sparsity = count_sparse_elements(param, threshold)
            layer_sparsities.append(sparsity)

    # Calculate average sparsity across all layers
    if layer_sparsities:
        total_sparsity = sum(layer_sparsities) / len(layer_sparsities)
        return total_sparsity
    else:
        return 0.0

def calculate_weight_entropy(model, epsilon=1e-10):
    """
    Calculate the absolute weight entropy using the formula:
    H(W) = -sum_{i=1}^m sum_{j=1}^n |w_{ij}| log|w_{ij}|

    Args:
        model: PyTorch model
        epsilon: small constant to avoid log(0)

    Returns:
        float: total weight entropy
        dict: layer-wise entropies
    """
    def compute_entropy(tensor):
        """Helper function to compute entropy for a single tensor"""
        # Flatten the tensor and take absolute values
        abs_weights = torch.abs(tensor.flatten())

        # Normalize weights to sum to 1 (treating them as probabilities)
        normalized_weights = abs_weights / (torch.sum(abs_weights) + epsilon)

        # Calculate entropy
        entropy = -torch.sum(
            normalized_weights * torch.log(normalized_weights + epsilon)
        ).item()

        return entropy

    total_entropy = 0.0
    layer_entropies = {}

    # Calculate entropy for each layer
    for name, param in model.named_parameters():
        if param.dim() > 0:  # Skip scalar parameters
            layer_entropy = compute_entropy(param)
            layer_entropies[name] = layer_entropy
            total_entropy += layer_entropy

    return total_entropy, layer_entropies

In [None]:
def train_with_test_loss(model, dataloaders, criterion, optimizer, run, num_epochs=20, device='cuda', scaler_y=None):
    train_loader, val_loader, test_loader = dataloaders

    # Initialize the cosine annealing scheduler with warm restarts
    # T_0 is the number of epochs before first restart
    # T_mult is the factor by which T_i increases after each restart
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer,
        T_0=500,  # First restart occurs after 100 epochs
        T_mult=1,  # Each restart interval is twice as long as the previous one
        eta_min=0.0005
    )

    best_val_mae = float('inf')
    best_val_loss = float('inf')
    best_model_state = None

    for epoch in range(num_epochs):
        # Training Phase
        model.train()
        train_loss = 0.0
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = criterion(outputs['regression_output'], y_batch) 
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Step the scheduler after each batch
        scheduler.step()
        current_lr = scheduler.get_last_lr()[0]

        # Validation Phase
        val_loss = 0.0
        y_true_val = []
        y_pred_val = []

        model.eval()
        with torch.no_grad():
            for x_batch, y_batch in val_loader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                outputs = model(x_batch)
                loss = criterion(outputs['regression_output'], y_batch) 
                val_loss += loss.item()

                y_true_val.append(y_batch.cpu().numpy())
                y_pred_val.append(outputs['regression_output'].cpu().numpy())

            # Test Phase (Monitoring Test Dataset)
            test_loss = 0.0
            y_true_test = []
            y_pred_test = []
            for x_batch, y_batch in test_loader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                outputs = model(x_batch)
                loss = criterion(outputs['regression_output'], y_batch)
                test_loss += loss.item()

                y_true_test.append(y_batch.cpu().numpy())
                y_pred_test.append(outputs['regression_output'].cpu().numpy())

        y_true_val = np.concatenate(y_true_val)
        y_pred_val = np.concatenate(y_pred_val)
        y_true_test = np.concatenate(y_true_test)
        y_pred_test = np.concatenate(y_pred_test)

        if scaler_y is not None:
            y_true_val_original = scaler_y.inverse_transform(y_true_val)
            y_pred_val_original = scaler_y.inverse_transform(y_pred_val)
            y_true_test_original = scaler_y.inverse_transform(y_true_test)
            y_pred_test_original = scaler_y.inverse_transform(y_pred_test)
        else:
            y_true_val_original = y_true_val
            y_pred_val_original = y_pred_val
            y_true_test_original = y_true_test
            y_pred_test_original = y_pred_test

        val_mae = mean_absolute_error(y_true_val_original, y_pred_val_original)
        val_r2 = r2_score(y_true_val_original, y_pred_val_original)
        test_mae = mean_absolute_error(y_true_test_original, y_pred_test_original)
        test_r2 = r2_score(y_true_test_original, y_pred_test_original)

        if val_mae < best_val_mae:
            best_val_mae = val_mae
            best_model_state = model.state_dict().copy()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        test_loss /= len(test_loader)

        # Calculate sparsity and entropy metrics
        sparsity = calculate_sparsity(model)
        abs_weight_entropy, layer_entropies = calculate_weight_entropy(model)

        # Log all metrics, including test metrics and learning rate
        run.log({
            'epoch': epoch,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'test_loss': test_loss,
            'val_mae_original_scale': val_mae,
            'val_r2_original_scale': val_r2,
            'test_mae_original_scale': test_mae,
            'test_r2_original_scale': test_r2,
            'sparsity': sparsity,
            'absolute_weight_entropy': abs_weight_entropy,
            'learning_rate': current_lr
        })

    # After training, load the best model state
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

In [None]:
def run_cv_experiment(params, n_splits=5, device='cuda'):
    """
    Run cross-validation experiment with transformer model and save indexed predictions.

    Args:
        params (dict): Model and training parameters
        n_splits (int): Number of CV splits
        device (str): Computing device ('cuda' or 'cpu')
    """
    # Initialize wandb
    run = wandb.init(
        project=f"wgs_single_reviewer_{params['body_site']}",
        config=params,
        reinit=True
    )

    try:
        # Data preparation for WGS
        table = pd.read_csv('control.csv', index_col=0)
        age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})
        age_metadata = age_metadata.loc[(age_metadata.age.notna()) & (age_metadata.body_site == params['body_site'])]
        table = table.loc[table.index.isin(age_metadata.index)]
        table = table.drop_duplicates(subset='subject_id', keep='first')
        shared_index = table.index.intersection(age_metadata.index)
        table = table.loc[shared_index].drop(columns=['study_name', 'study_condition', 'subject_id'])
        age_metadata = age_metadata.loc[shared_index]

        # Remove columns with all zeros
        all_zero_columns = (table == 0).all(axis=0)
        table = table.loc[:, ~all_zero_columns]
        df = (table * 1e7).round().astype(int)

        # Data Preparation for 16S
        # table = load_table('data/skin_1975.biom').to_dataframe(dense=True).T.astype(int)
        # age_metadata = pd.read_csv('data/skin_1975_map.txt', sep='\t', index_col=0, dtype={'qiita_host_age': float})
        # # age_metadata = age_metadata.drop_duplicates(subset='host_subject_id')
        # table = table.loc[age_metadata.index]
        # columns_to_drop = table.columns[table.apply(lambda col: (col != 0).sum()) < 25]# drop columns with low prev
        # df = table.drop(columns=columns_to_drop).copy()
        # print(df.shape)
        # Prepare target variable
        y = age_metadata.age.values.reshape(-1, 1)
        arr = np.nan_to_num(matrix_rclr(df.values), nan=0.0)
        arr_reduced = np.nan_to_num(matrix_rclr(df.values), nan=0.0)
        if arr.ndim > 2:
            arr = arr.reshape(arr.shape[0], -1)

        # PCA reduction
        pca = PCA(n_components=256)
        arr_reduced = pca.fit_transform(arr)

        print(f"Original dimensions: {arr.shape}")
        print(f"Reduced dimensions: {arr_reduced.shape}")
        print(f"Number of components: {pca.n_components_}")
        print(f"Explained variance ratio: {pca.explained_variance_ratio_.sum():.3f}")

        X = torch.tensor(arr_reduced).float()
        y = torch.tensor(y).float()

        # Initialize scalers
        scaler_X = StandardScaler() if params.get('normalize_X', True) else None
        scaler_y = MinMaxScaler() if params.get('normalize_y', True) else None

        # Apply normalization
        X_np = scaler_X.fit_transform(X.numpy()) if scaler_X else X.numpy()
        y_np = scaler_y.fit_transform(y.numpy()) if scaler_y else y.numpy()

        # Create stratification bins
        n_bins = 5
        strata = pd.qcut(age_metadata.age, q=n_bins, labels=[f'age_bin_{i}' for i in range(n_bins)]).astype(str)+age_metadata.study_name.astype(str)+age_metadata.country.astype(str)
        groups = age_metadata.subject_id.astype(str)


        # Filter out samples from strata with less than 10 occurrences
        strata_counts = strata.value_counts()
        valid_strata = strata_counts[strata_counts >= 10].index

        # Create mask and apply to all relevant variables
        mask = strata.isin(valid_strata)
        X_filtered = X[mask]
        y_filtered = y[mask]
        X_np = X_np[mask]
        y_np = y_np[mask]
        age_metadata_filtered = age_metadata[mask]
        strata_filtered = strata[mask]
        groups_filtered = groups[mask]

        # Print filtering stats
        print(f"Original samples: {len(strata)}")
        print(f"Samples after filtering strata with <10 occurrences: {len(strata_filtered)}")
        print(f"Removed {len(strata) - len(strata_filtered)} samples")

        # Update variables to use filtered versions
        X = X_filtered
        y = y_filtered
        age_metadata = age_metadata_filtered
        strata = strata_filtered
        groups = groups_filtered

        kf = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=42)

        # Initialize prediction tracking
        predictions_dict = {
            'sample_id': [],
            'true_age': [],
            'predicted_age': [],
            'fold': []
        }

        fold_results = []

        for fold, (train_index, test_index) in enumerate(kf.split(X_np, groups=groups, y=strata), 1):
            try:
                # Split data for current fold
                X_train_full, X_test = X_np[train_index], X_np[test_index]
                y_train_full, y_test = y_np[train_index], y_np[test_index]
                strata_train_full = strata.iloc[train_index]

                # Create validation split
                sss = StratifiedShuffleSplit(n_splits=1, test_size=params['test_split'], random_state=42)
                train_index_sub, val_index = next(sss.split(X_train_full, y=strata_train_full))

                # Final train/val split
                X_train = X_train_full[train_index_sub]
                y_train = y_train_full[train_index_sub]
                X_val = X_train_full[val_index]
                y_val = y_train_full[val_index]

                # Create data loaders
                train_data = TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train).float())
                val_data = TensorDataset(torch.tensor(X_val).float(), torch.tensor(y_val).float())
                test_data = TensorDataset(torch.tensor(X_test).float(), torch.tensor(y_test).float())

                train_loader = DataLoader(train_data, batch_size=params['batch_size'], shuffle=True)
                val_loader = DataLoader(val_data, batch_size=params['batch_size'])
                test_loader = DataLoader(test_data, batch_size=params['batch_size'])

                model = NormalizedTransformer(
                    input_dim=X_train.shape[1],
                    num_layers=params['num_layers'],
                    hidden_dim=params['hidden_dim'],
                    output_dim=1,
                ).to(device)

                # Initialize weights
                def init_weights(m):
                    if isinstance(m, nn.Linear):
                        torch.nn.init.xavier_uniform_(m.weight)
                        if m.bias is not None:
                            torch.nn.init.zeros_(m.bias)

                model.apply(init_weights)
                model = model.to(device)

                # Setup optimizer
                optimizer = params['optimizer'](
                    model.parameters(),
                    lr=params['learning_rate'],
                    weight_decay=params['weight_decay']
                )

                criterion = nn.MSELoss()#nn.HuberLoss(delta=10)#

                # Training phase
                try:
                    train_with_test_loss(
                        model,
                        (train_loader, val_loader, test_loader),
                        criterion,
                        optimizer,
                        run,
                        num_epochs=params['num_epochs'],
                        device=device,
                        scaler_y=scaler_y
                    )
                except RuntimeError as e:
                    if "nan" in str(e).lower():
                        run.log({
                            f'fold_{fold}_error': f'NaN loss detected during training: {str(e)}',
                            f'fold_{fold}_status': 'failed_nan_loss'
                        })
                        print(f"Fold {fold} failed due to NaN loss. Skipping to next fold.")
                        continue

                # Evaluation phase
                try:
                    model.eval()
                    with torch.no_grad():
                        batch_start = 0
                        for x_batch, y_batch in test_loader:
                            # Get indices for current batch
                            batch_size = len(x_batch)
                            batch_indices = test_index[batch_start:batch_start + batch_size]
                            batch_start += batch_size

                            x_batch = x_batch.to(device)
                            reg_outputs = model(x_batch)['regression_output'].cpu().numpy()

                            if np.any(np.isnan(reg_outputs)):
                                raise RuntimeError("NaN values detected in model predictions")

                            # Convert predictions back to original scale
                            y_true_batch = scaler_y.inverse_transform(y_batch.numpy()) if scaler_y else y_batch.numpy()
                            y_pred_batch = scaler_y.inverse_transform(reg_outputs) if scaler_y else reg_outputs

                            # Store predictions with corresponding indices
                            predictions_dict['sample_id'].extend(age_metadata.index[batch_indices])
                            predictions_dict['true_age'].extend(y_true_batch.flatten())
                            predictions_dict['predicted_age'].extend(y_pred_batch.flatten())
                            predictions_dict['fold'].extend([fold] * batch_size)

                    # Calculate metrics for this fold
                    fold_true = predictions_dict['true_age'][-len(test_index):]
                    fold_pred = predictions_dict['predicted_age'][-len(test_index):]

                    mae = mean_absolute_error(fold_true, fold_pred)
                    r2 = r2_score(fold_true, fold_pred)

                    # Log results
                    fold_results.append({
                        'fold': fold,
                        'mae': mae,
                        'r2': r2
                    })

                    run.log({
                        f'fold_{fold}_mae': mae,
                        f'fold_{fold}_r2': r2,
                        f'fold_{fold}_status': 'completed'
                    })

                except Exception as e:
                    run.log({
                        f'fold_{fold}_error': f'Error during evaluation: {str(e)}',
                        f'fold_{fold}_status': 'failed_evaluation'
                    })
                    print(f"Error during evaluation of fold {fold}: {str(e)}")

            except Exception as e:
                run.log({
                    f'fold_{fold}_error': f'Fold processing error: {str(e)}',
                    f'fold_{fold}_status': 'failed_processing'
                })
                print(f"Error processing fold {fold}: {str(e)}")

        # Calculate and log overall metrics
        if fold_results:
            overall_mae = np.mean([r['mae'] for r in fold_results])
            overall_r2 = np.mean([r['r2'] for r in fold_results])
            mae_std = np.std([r['mae'] for r in fold_results])
            r2_std = np.std([r['r2'] for r in fold_results])

            # Create predictions DataFrame
            predictions_df = pd.DataFrame(predictions_dict)

            # Save predictions locally and to wandb
            predictions_df.to_csv('predictions.csv')
            table = wandb.Table(dataframe=predictions_df)
            run.log({
                "predictions_table": table,
                'overall_mae': overall_mae,
                'overall_r2': overall_r2,
                'mae_std': mae_std,
                'r2_std': r2_std
            })

            # Create final publication-quality regression plot
            plt.figure(figsize=(8, 8))

            # Create scatter plot
            plt.scatter(predictions_df['true_age'], predictions_df['predicted_age'],
                       alpha=0.3, color='#4169E1',
                       edgecolor='none', s=60, label='Test Predictions')

            # Calculate and plot best fit line
            slope, intercept, r_value, p_value, std_err = stats.linregress(
                predictions_df['true_age'],
                predictions_df['predicted_age']
            )
            line_x = np.linspace(min(predictions_df['true_age']),
                                max(predictions_df['true_age']), 100)
            line_y = slope * line_x + intercept
            plt.plot(line_x, line_y, color='#C4161C', linestyle='--',
                     label=f'Best Fit (R² = {r_value**2:.3f})')

            # Add perfect prediction line (y=x)
            plt.plot([min(predictions_df['true_age']), max(predictions_df['true_age'])],
                     [min(predictions_df['true_age']), max(predictions_df['true_age'])],
                     color='black', linestyle='-', alpha=0.3, label='Perfect Prediction')

            # Set labels and title with metrics
            plt.xlabel("True Age (years)", fontsize=12, fontweight='bold')
            plt.ylabel("Predicted Age (years)", fontsize=12, fontweight='bold')
            plt.title(f"MAE = {overall_mae:.2f} ± {mae_std:.2f} years",
                      fontsize=14, fontweight='bold', pad=15)

            # Customize grid
            plt.grid(True, linestyle='--', alpha=0.3)

            # Add legend
            plt.legend(frameon=True, facecolor='white', framealpha=1,
                      edgecolor='none', loc='upper left')

            # Set equal aspect ratio
            plt.axis('equal')

            # Adjust layout
            plt.tight_layout()

            # Customize spines
            for spine in plt.gca().spines.values():
                spine.set_linewidth(1.5)

            # Save the plot
            plt.savefig("final_regression_plot.png", dpi=300, bbox_inches='tight')
            run.log({"final_regression_plot": wandb.Image("final_regression_plot.png")})
            plt.close()

    except Exception as e:
        run.log({
            'experiment_error': str(e),
            'experiment_status': 'failed'
        })
        print(f"Fatal error in experiment: {str(e)}")

    finally:
        run.finish()

    return {
        'overall_mae': overall_mae if 'overall_mae' in locals() else None,
        'overall_r2': overall_r2 if 'overall_r2' in locals() else None,
        'predictions_df': predictions_df if 'predictions_df' in locals() else None
    }

In [None]:
# from _typeshed import TraceFunction
if __name__ == "__main__":
    # Define parameters
    body_sites = ['skin']
    num_layers = [1]
    hidden_dims = [512]
    batch_sizes = [4096]
    learning_rates = [0.001]
    weight_decays = [0.001]
    test_splits = [0.2]
    optimizers = [optim.AdamW]
    n_splits=10

    # Device configuration
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Nested loops for parameter search
    for body_site in body_sites:
        for num_layer in num_layers:
            for hidden_dim in hidden_dims:
                for batch_size in batch_sizes:
                    for lr in learning_rates:
                        for wd in weight_decays:
                            for test_split in test_splits:
                                for optum in optimizers:
                                    print(f"\nTrying parameters: hidden_dim={hidden_dim}, batch_size={batch_size}, "
                                        f"learning_rate={lr}, weight_decay={wd}")

                                    current_params = {
                                        'hidden_dim': hidden_dim,
                                        'batch_size': batch_size,
                                        'num_layers': num_layer,
                                        'learning_rate': lr,
                                        'weight_decay': wd,
                                        'num_epochs': 1000,
                                        'optimizer': optum,
                                        'body_site': body_site,
                                        'test_split': test_split,
                                        'normalize_X': False,
                                        'normalize_y': False,
                                    }


                                    run_cv_experiment(current_params, n_splits=n_splits, device=device)

In [None]:
import optuna
from optuna.trial import TrialState
from functools import partial
import torch.optim as optim
import optuna.visualization as viz
import matplotlib.pyplot as plt

def objective(trial, device='cuda', wandb_run=None):
    # Define the parameter search space
    params = {
        'body_site': 'stool', #trial.suggest_categorical('body_site', ['skin']), #'oralcavity', 'stool',
        'hidden_dim': trial.suggest_categorical('hidden_dim', [32, 64, 128, 512, 1024]),
        'num_layers': trial.suggest_int('num_layers', 1, 3),
        'batch_size': trial.suggest_categorical('batch_size', [8192]),
        'learning_rate': trial.suggest_float('learning_rate', 1e-5, 7e-1),#, log=True),
        'weight_decay': trial.suggest_float('weight_decay', 1e-5, 1),#, log=True),
        'test_split': trial.suggest_categorical('test_split', [0.2, 0.1]),
        'num_epochs': trial.suggest_categorical('num_epochs', [100]),#100,  # Fixed parameter
        'normalize_X': trial.suggest_categorical('normalize_X', [True, False]),
        'normalize_y': trial.suggest_categorical('normalize_y', [True, False]),
    }

    # Optimizer selection
    optimizer_name = trial.suggest_categorical('optimizer', ['AdamW','SGD'])#,'SGD'
    params['optimizer'] = optim.AdamW if optimizer_name == 'AdamW' else optim.SGD

    try:
        results = run_cv_experiment(params, n_splits=10, device=device)
        mae = results['overall_mae']

        # Log additional metrics to Optuna
        trial.set_user_attr('r2_score', results['overall_r2'])
        trial.set_user_attr('mae_score', mae)

        # Log to wandb if available
        if wandb_run is not None and trial.state == TrialState.COMPLETE:
            wandb_run.log({
                "best_mae": mae,
                "trial_mae": mae,
                "r2_score": results['overall_r2'],
                **params,
                "trial_number": trial.number
            })

        return mae

    except Exception as e:
        print(f"Trial failed: {str(e)}")
        raise optuna.exceptions.TrialPruned()

def create_optimization_plots(study):
    """Create and save various optimization visualization plots"""

    # Create plots directory if it doesn't exist
    import os
    os.makedirs("optuna_plots", exist_ok=True)

    try:
        # 1. Parameter importances
        importance_plot = viz.plot_param_importances(study)
        importance_plot.write_html("optuna_plots/param_importances.html")

        # 2. Optimization history
        history_plot = viz.plot_optimization_history(study)
        history_plot.write_html("optuna_plots/optimization_history.html")

        # 3. Parallel coordinate plot
        parallel_plot = viz.plot_parallel_coordinate(study)
        parallel_plot.write_html("optuna_plots/parallel_coordinate.html")

        # 4. Contour plot for the two most important parameters
        contour_plot = viz.plot_contour(study)
        contour_plot.write_html("optuna_plots/contour.html")

        # 5. Slice plot
        slice_plot = viz.plot_slice(study)
        slice_plot.write_html("optuna_plots/slice_plot.html")

        # 6. Intermediate values plot
        intermediate_plot = viz.plot_intermediate_values(study)
        intermediate_plot.write_html("optuna_plots/intermediate_values.html")

        # 7. Parameter relationships
        param_plot = viz.plot_param_relationships(study)
        param_plot.write_html("optuna_plots/param_relationships.html")

        # 8. High-dimensional parameter relationships
        edf_plot = viz.plot_edf(study)
        edf_plot.write_html("optuna_plots/edf.html")

        print("\nVisualization plots have been saved in the 'optuna_plots' directory:")
        print("  - Parameter importances (param_importances.html)")
        print("  - Optimization history (optimization_history.html)")
        print("  - Parallel coordinate (parallel_coordinate.html)")
        print("  - Contour plot (contour.html)")
        print("  - Slice plot (slice_plot.html)")
        print("  - Intermediate values (intermediate_values.html)")
        print("  - Parameter relationships (param_relationships.html)")
        print("  - EDF plot (edf.html)")

    except Exception as e:
        print(f"Error creating some plots: {str(e)}")
        print("Some plots may not be available due to insufficient data or other requirements")

def run_optimization(n_trials=200, device='cuda'):
    study = optuna.create_study(
        direction="minimize",
        study_name="microbiome_age_prediction",
        sampler=optuna.samplers.TPESampler(seed=42),
        pruner=optuna.pruners.MedianPruner(
            n_startup_trials=5,
            n_warmup_steps=20,
            interval_steps=10
        )
    )

    study.optimize(
        partial(objective, device=device),
        n_trials=n_trials,
        callbacks=[
            lambda study, trial: print(f"\nTrial {trial.number} finished with MAE: {trial.value}")
        ]
    )

    # Print optimization results
    print("\nStudy statistics: ")
    print(f"  Number of finished trials: {len(study.trials)}")
    print(f"  Number of pruned trials: {len(study.get_trials(states=[TrialState.PRUNED]))}")
    print(f"  Number of complete trials: {len(study.get_trials(states=[TrialState.COMPLETE]))}")

    print("\nBest trial:")
    trial = study.best_trial
    print(f"  Best MAE: {trial.value}")
    print("\nBest parameters:")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    # Create importance plot
    try:
        importances = optuna.importance.get_param_importances(study)
        print("\nParameter importances:")
        for param, importance in importances.items():
            print(f"    {param}: {importance:.3f}")
    except:
        print("Could not compute parameter importances")

    return study

if __name__ == "__main__":
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Run optimization
    study = run_optimization(n_trials=100, device=device)

    # Save study results
    study.trials_dataframe().to_csv("optuna_results.csv")

    # Create visualization plots
    create_optimization_plots(study)

[I 2025-04-28 17:20:19,406] A new study created in memory with name: microbiome_age_prediction
[34m[1mwandb[0m: Currently logged in as: [33mtylerdmyers[0m ([33mqhat[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▃▃▃▃▃▄▅▆▇█▂▃▅▂▃▄▃▃▃▂▂▆▆▂▂▁▄▄▄▃▁▁▅▁▄▃▂▆▆▆
epoch,▂▂▃▅▆▄▁▂▅▆▂▂▃▅▁▃▆▇▁▃▃▅▇██▄▄▅▅▇▆▆▇▇█▂▇▇▂▄
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,94.58514
epoch,99
fold_10_mae,14.43467
fold_10_r2,0.40035
fold_10_status,completed
fold_1_mae,14.63437
fold_1_r2,0.37485
fold_1_status,completed
fold_2_mae,14.02804
fold_2_r2,0.35473


[I 2025-04-28 17:21:55,838] Trial 0 finished with value: 13.938607823894461 and parameters: {'hidden_dim': 64, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.040667947681617946, 'weight_decay': 0.8661774840134774, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'SGD'}. Best is trial 0 with value: 13.938607823894461.



Trial 0 finished with MAE: 13.938607823894461


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▅▅▅▅▅▄▅▆▆▅▃▂▅▆▅▁▃▆▆▂█▅▅▅▅▄▅▅▁▅▅▅▅▅▅▆▅▅▁
epoch,▅▆▂▂▆▂▄▅▅▆▇█▃▃▄▄█▃▅▅▃▄██▁▃▄▄▆▆█▂▃▄▄▆▁▂▄█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,147.84726
epoch,99
fold_10_mae,10.35253
fold_10_r2,0.64133
fold_10_status,completed
fold_1_mae,10.72722
fold_1_r2,0.61572
fold_1_status,completed
fold_2_mae,12.67162
fold_2_r2,0.48201


[I 2025-04-28 17:26:28,400] Trial 1 finished with value: 11.30642749998847 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.20450833252816736, 'weight_decay': 0.3663681796752588, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 1 with value: 11.30642749998847.



Trial 1 finished with MAE: 11.30642749998847


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█▇▄▅▄▂▇▄▄▄▄▅▄▄▄▅▅▄▄▂▃▄▄▁█▄▃▂▂▃▂▂▄▅▄▅▅▅▅▄
epoch,▄▇█▂▂▄▄▂▅▅▂▃▄▄▆▇▇▁▃▅▁▁▂▄▄▆█▁▂▄▇▇█▁▆▂▄▅▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,92.05361
epoch,99
fold_10_mae,21.10469
fold_10_r2,-0.09185
fold_10_status,completed
fold_1_mae,26.13567
fold_1_r2,-0.98255
fold_1_status,completed
fold_2_mae,21.75979
fold_2_r2,-0.50818


[I 2025-04-28 17:28:03,992] Trial 2 finished with value: 27.57600485364903 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.4789662762282447, 'weight_decay': 0.44015809221466395, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': False, 'optimizer': 'SGD'}. Best is trial 1 with value: 11.30642749998847.



Trial 2 finished with MAE: 27.57600485364903


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,█▇▇▇▂██▄▄▁█▅▅▄███▅▃█▂██▇▆▅▄▃█▇█▅▄▃█▅▄▄█▁
epoch,▁▂▂▃▃▂▂▃▄▆▆▂▂▂▅▃▃▇▁▂▆▂▂▄▅▂▃▅▆▆▃▃▃▄▅▆█▂▃█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,195.70453
epoch,99
fold_10_mae,16.08387
fold_10_r2,0.25907
fold_10_status,completed
fold_1_mae,11.18261
fold_1_r2,0.62074
fold_1_status,completed
fold_2_mae,12.31149
fold_2_r2,0.5495


[I 2025-04-28 17:30:03,668] Trial 3 finished with value: 12.44844076992538 and parameters: {'hidden_dim': 128, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.4185340061679715, 'weight_decay': 0.9218750162807666, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 1 with value: 11.30642749998847.



Trial 3 finished with MAE: 12.44844076992538


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█████▁███▃███████▃██▆▅▅█▇▂████▂▂█▇██▇▆▅▇
epoch,▁▃▃▃▄▇▄▄▄█▂▃▅▆▃▃▄▇▇▃▇▄▆▂▆█▁▁▂▃▆▇▃▆▆▇▂▃▄█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,267.11287
epoch,99
fold_10_mae,10.52701
fold_10_r2,0.61443
fold_10_status,completed
fold_1_mae,11.09286
fold_1_r2,0.60191
fold_1_status,completed
fold_2_mae,14.67716
fold_2_r2,0.3764


[I 2025-04-28 17:34:38,476] Trial 4 finished with value: 10.956215250262936 and parameters: {'hidden_dim': 512, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.5405736160599672, 'weight_decay': 0.1987236943773571, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 4 with value: 10.956215250262936.



Trial 4 finished with MAE: 10.956215250262936


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▆▆▅▅██▁██▆▅▅▅███▆▄▃███▇▅███▆▅▇▆▆▅▇▇▆█▇▅
epoch,▃▅▆█▁▅▅▇█▁▆▆▆▇▅█▁▃▆▇▁▃▄▅▅▇▁▁▂▂█▂▄▅▅▆▆▇▂▄
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,75.05359
epoch,99
fold_10_mae,12.84604
fold_10_r2,0.52213
fold_10_status,completed
fold_1_mae,21.13066
fold_1_r2,-0.29525
fold_1_status,completed
fold_2_mae,11.00292
fold_2_r2,0.60901


[I 2025-04-28 17:36:10,582] Trial 5 finished with value: 13.752495279910487 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.5107270287748614, 'weight_decay': 0.6375610957804996, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 4 with value: 10.956215250262936.



Trial 5 finished with MAE: 13.752495279910487


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▄█▆▅▅▇▆▆▇▇▅▇█▇▇▄▄▂███▇▇▆█▅▃▂█▆▂▁██▅▇▇▇▆▅
epoch,▁▂▂▃▆█▄▅▅▆▅▆▁▂▂▆▁▂▂▆▁▂▂▇▇█▄▆█▂▆█▂▃▄▁▂▅▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,118.76782
epoch,99
fold_10_mae,13.68695
fold_10_r2,0.46825
fold_10_status,completed
fold_1_mae,14.20275
fold_1_r2,0.48657
fold_1_status,completed
fold_2_mae,10.18521
fold_2_r2,0.64509


[I 2025-04-28 17:37:48,148] Trial 6 finished with value: 12.319122534751227 and parameters: {'hidden_dim': 32, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.2200560431936179, 'weight_decay': 0.5085756054577911, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 4 with value: 10.956215250262936.



Trial 6 finished with MAE: 12.319122534751227


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples
Error processing fold 1: Input contains NaN.
Error processing fold 2: Input contains NaN.
Error processing fold 3: Input contains NaN.
Error processing fold 5: Input contains NaN.
Error processing fold 8: Input contains NaN.
Error processing fold 9: Input contains NaN.
Error processing fold 10: Input contains NaN.


0,1
absolute_weight_entropy,▇▇▅▄▃▄▃▃▁▂▅▂▃▂▂▃▃▃▂▂▄▃▂██▄▃▃▄▅▂▂▄▅▃▃▇▅▄▃
epoch,▂▃▄▆▂▁▂▃▇▂▃▄▄▅▇█▂▂▂▃▄▁▂▆█▁▁▂▄▅▇▇▇▃▄▄▅▆▂▆
fold_4_mae,▁
fold_4_r2,▁
fold_6_mae,▁
fold_6_r2,▁
fold_7_mae,▁
fold_7_r2,▁
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
mae_std,▁

0,1
absolute_weight_entropy,58.7124
epoch,85
fold_10_error,Fold processing erro...
fold_10_status,failed_processing
fold_1_error,Fold processing erro...
fold_1_status,failed_processing
fold_2_error,Fold processing erro...
fold_2_status,failed_processing
fold_3_error,Fold processing erro...
fold_3_status,failed_processing


[I 2025-04-28 17:39:06,431] Trial 7 finished with value: 28601391893.987194 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.6247923733529995, 'weight_decay': 0.5393468484932316, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': False, 'optimizer': 'SGD'}. Best is trial 4 with value: 10.956215250262936.



Trial 7 finished with MAE: 28601391893.987194


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▃▃▃▇▁▇███▁██▇▇▇▅▅▃▂██▇▄█▆▅▄▁▁▇▅▅▄▄██▇▇▄
epoch,▁▃▅▅▆▇▇▇▆▂██▁▂▂▅▆▆▂▃▄▄▅▇▂▄▅▅▁▂▂▃▅▆▂▄▅▇▇█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,133.83005
epoch,99
fold_10_mae,10.54741
fold_10_r2,0.65162
fold_10_status,completed
fold_1_mae,10.99313
fold_1_r2,0.61194
fold_1_status,completed
fold_2_mae,10.61778
fold_2_r2,0.59922


[I 2025-04-28 17:40:49,941] Trial 8 finished with value: 10.770324908127295 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.6600373636417243, 'weight_decay': 0.32320969999143506, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 8 with value: 10.770324908127295.



Trial 8 finished with MAE: 10.770324908127295


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


Error processing fold 1: Input contains NaN.
Error processing fold 3: Input contains NaN.
Error processing fold 4: Input contains NaN.
Error processing fold 5: Input contains NaN.
Error processing fold 6: Input contains NaN.
Error processing fold 10: Input contains NaN.


0,1
absolute_weight_entropy,▅▅▄▁▆▄▃▃▃▄▅▅▄▅▃▃▂█▃▆▄▃▇▃▂▂▃▆▅▃▃▁▂▁█▄▄▃▄▂
epoch,▂▄▄▅▅▇▃▄▆█▆▆▂▄▄▅▇▃▃▅▆▄▆▅▆▇█▂▄▅▆▇▁▄▄▇▇▁▅▆
fold_2_mae,▁
fold_2_r2,▁
fold_7_mae,▁
fold_7_r2,▁
fold_8_mae,▁
fold_8_r2,▁
fold_9_mae,▁
fold_9_r2,▁

0,1
absolute_weight_entropy,76.70771
epoch,84
fold_10_error,Fold processing erro...
fold_10_status,failed_processing
fold_1_error,Fold processing erro...
fold_1_status,failed_processing
fold_2_mae,6399743055.33764
fold_2_r2,-7.783763085520157e+16
fold_2_status,completed
fold_3_error,Fold processing erro...


[I 2025-04-28 17:42:16,508] Trial 9 finished with value: 8978981079.584831 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.6357870375177979, 'weight_decay': 0.23956949504806577, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': False, 'optimizer': 'SGD'}. Best is trial 8 with value: 10.770324908127295.



Trial 9 finished with MAE: 8978981079.584831


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▅▄▄▅▂▆▇▇▄▃▃▇▇▅▄▄▄▄▄▇▆▄▅▅▅▄▄▅▄▄▅█▆▁▅▅▄▄▃▁
epoch,▂▃▄▅▆██▄▅▅▇███▂▂▆▁▂▄▇▂▅▇▇▁▃▆█▁▂▅██▄███▄▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,147.67017
epoch,99
fold_10_mae,12.27991
fold_10_r2,0.53323
fold_10_status,completed
fold_1_mae,11.88666
fold_1_r2,0.55256
fold_1_status,completed
fold_2_mae,11.1913
fold_2_r2,0.56175


[I 2025-04-28 17:43:54,566] Trial 10 finished with value: 11.309518340237975 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.3009897391814214, 'weight_decay': 0.017971695996063297, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': False, 'optimizer': 'AdamW'}. Best is trial 8 with value: 10.770324908127295.



Trial 10 finished with MAE: 11.309518340237975


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▇▇▂█▆██▇███▇▇▇▇▇██████▇▁▇▇▇▇▂▇▇▇▇▇▇▇▇▇
epoch,▁▂▂▄▅▂▂▄▄▇▂▂▃▄▅▇█▂▂▃▅▂▃▄▄▆▇█▁▂▂▂▄▅▅▆▇▇▇█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,274.25094
epoch,99
fold_10_mae,10.11663
fold_10_r2,0.65184
fold_10_status,completed
fold_1_mae,10.51413
fold_1_r2,0.63654
fold_1_status,completed
fold_2_mae,12.20146
fold_2_r2,0.56297


[I 2025-04-28 17:48:29,089] Trial 11 finished with value: 10.75231649117262 and parameters: {'hidden_dim': 512, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.6966258739157449, 'weight_decay': 0.141163017614989, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 11 with value: 10.75231649117262.



Trial 11 finished with MAE: 10.75231649117262


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▂▃▄▄▅▁▁▂▂▂▃▃▃▃▃▃▂▅▅▅▃▆▆█▃▃▆▄▅▅▃▃▃▃▃▆▆▆▆▆
epoch,▂▁▂▂▂▃▆▆▇▃█▁▃▄▃▅▅▅▆▆▁▂▄▄▄▅▆▂▄▅▇▇▃▃▄▅▇▁▃█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,294.20755
epoch,99
fold_10_mae,16.33409
fold_10_r2,0.27306
fold_10_status,completed
fold_1_mae,18.60379
fold_1_r2,0.10501
fold_1_status,completed
fold_2_mae,20.80151
fold_2_r2,-0.37713


[I 2025-04-28 17:53:01,895] Trial 12 finished with value: 17.33370507970004 and parameters: {'hidden_dim': 512, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.6768733993181006, 'weight_decay': 0.0057395492144572124, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 11 with value: 10.75231649117262.



Trial 12 finished with MAE: 17.33370507970004


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,███▇▅█▆██▆▅██▆▂▆▅▁▆▅▃▃▁███▇▄▄▇▆▆█▁███▇▇▄
epoch,▁▃▄▄▆▇▇▃▃▆▃▃▅▆▁▃▃▄▄▅█▂▃▇█▆▇██▁▄▆▆▆▇▃▄█▁▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,196.20675
epoch,99
fold_10_mae,10.80011
fold_10_r2,0.61554
fold_10_status,completed
fold_1_mae,11.92746
fold_1_r2,0.5541
fold_1_status,completed
fold_2_mae,9.89018
fold_2_r2,0.65446


[I 2025-04-28 17:56:33,088] Trial 13 finished with value: 10.653389159860449 and parameters: {'hidden_dim': 512, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.6944183203336247, 'weight_decay': 0.20634029424207834, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 13 with value: 10.653389159860449.



Trial 13 finished with MAE: 10.653389159860449


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▇▇▇██▆▂▇█▆▄▃▂▁▇▇▄▇▇▇▆▅███▇██▇█▆██▅▇████▆
epoch,▁▁▂▂▃▇█▂▇▃█▂▄▆▁▃▄▆█▁▄▆▆▃▄▇█▂▆▆▁▂▄▄▄▂▄▄▄▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,284.06911
epoch,99
fold_10_mae,10.96478
fold_10_r2,0.61304
fold_10_status,completed
fold_1_mae,10.73495
fold_1_r2,0.62504
fold_1_status,completed
fold_2_mae,12.63942
fold_2_r2,0.4943


[I 2025-04-28 18:00:45,872] Trial 14 finished with value: 11.414474469811738 and parameters: {'hidden_dim': 512, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.5667071470579965, 'weight_decay': 0.15953930299786356, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 13 with value: 10.653389159860449.



Trial 14 finished with MAE: 11.414474469811738


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▇▇▄▅▄▄▄▄▅▅▄▃▃▃▂█▃▄▅▄▆▆▆▆▁▅▅▅▅▄▇▆▆▅▇▇▆▆▆
epoch,▂▅▇▁▄▇▂▂▃▅▃▄▇▁▃▄▅▆▇▇▂▃▃▄▅▇▇█▃▃█▂▃▄▅▃▆▁▂▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,215.12465
epoch,99
fold_10_mae,11.15307
fold_10_r2,0.60077
fold_10_status,completed
fold_1_mae,10.49333
fold_1_r2,0.62754
fold_1_status,completed
fold_2_mae,10.84675
fold_2_r2,0.61437


[I 2025-04-28 18:04:20,452] Trial 15 finished with value: 11.433536686648218 and parameters: {'hidden_dim': 512, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.39330592099281947, 'weight_decay': 0.12718465936324053, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 13 with value: 10.653389159860449.



Trial 15 finished with MAE: 11.433536686648218


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,████▇▅▅██▄█████▄▄█▆▆▄▆▄█▅▅▁███▇▅▄▃▇▄███▅
epoch,▁▁▄▄▄▆█▁▄▆▅▆▇▇▂▂▄▆▇█▃▇█▂▃▅▄▄▄▂▃▇█▃▅██▁▃█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,179.35092
epoch,99
fold_10_mae,12.41562
fold_10_r2,0.52213
fold_10_status,completed
fold_1_mae,12.12452
fold_1_r2,0.594
fold_1_status,completed
fold_2_mae,12.18834
fold_2_r2,0.51461


[I 2025-04-28 18:07:54,286] Trial 16 finished with value: 14.086953363500058 and parameters: {'hidden_dim': 512, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.690182089169559, 'weight_decay': 0.7020297352579176, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 13 with value: 10.653389159860449.



Trial 16 finished with MAE: 14.086953363500058


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▆▆▃▅▅▅▅▅▅███▃▁▅▅▅▄▄▄▅▄▄▄▄▄▅▅▅▄▄▄▄▄▄▇▇▇▂
epoch,▃▄▅▁▃▂▅▆▇▂▆▇█▁▅▂▃▅▇█▃▃▃▃▅▆▇▂▅▆█▁▂▃▃▅▆▇▂█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,320.95476
epoch,99
fold_10_mae,16.70302
fold_10_r2,0.2353
fold_10_status,completed
fold_1_mae,12.24368
fold_1_r2,0.53251
fold_1_status,completed
fold_2_mae,12.18289
fold_2_r2,0.49534


[I 2025-04-28 18:18:13,285] Trial 17 finished with value: 14.590983389053772 and parameters: {'hidden_dim': 1024, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.0220476085370519, 'weight_decay': 0.2780206506039601, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 13 with value: 10.653389159860449.



Trial 17 finished with MAE: 14.590983389053772


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


Error processing fold 1: Input contains NaN.
Error processing fold 8: Input contains NaN.
Error processing fold 10: Input contains NaN.


0,1
absolute_weight_entropy,▄▄▁▇▄▃███▂▆▅▅▄▄▄██▆▅▄▄▃▃█▄▃▃▃▂█▄▄▃█▅▅▄▃█
epoch,▂▂▂▄▆▇▇▃▃▇▂▄▅▅▇▂▂▃▃▄▁▂▇▇▁▃▃▄▆▆█▂▃▄▂▇▂▂▄▆
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁
fold_4_mae,▁
fold_4_r2,▁
fold_5_mae,▁
fold_5_r2,▁

0,1
absolute_weight_entropy,159.47764
epoch,83
fold_10_error,Fold processing erro...
fold_10_status,failed_processing
fold_1_error,Fold processing erro...
fold_1_status,failed_processing
fold_2_mae,620520034.93911
fold_2_r2,-731770615493309.8
fold_2_status,completed
fold_3_mae,436074851.87269


[I 2025-04-28 18:21:24,944] Trial 18 finished with value: 2965379196.7839227 and parameters: {'hidden_dim': 512, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.582268355375933, 'weight_decay': 0.09831320822280804, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': False, 'optimizer': 'SGD'}. Best is trial 13 with value: 10.653389159860449.



Trial 18 finished with MAE: 2965379196.7839227


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█▆███▇▇█▄█▆▅█▄▄█▂▁█████████████████▆▄██▃
epoch,▁▂▅▆▇▁▂▃▅▆▇██▃▄▅▆▇▇▁▆█▁▂▃▇▂▃▆▇▃▄▄▅▅▃▄▆▅▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,261.1891
epoch,99
fold_10_mae,10.28188
fold_10_r2,0.66743
fold_10_status,completed
fold_1_mae,14.60758
fold_1_r2,0.32198
fold_1_status,completed
fold_2_mae,10.8464
fold_2_r2,0.61994


[I 2025-04-28 18:26:02,270] Trial 19 finished with value: 11.707320983258004 and parameters: {'hidden_dim': 512, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.471818131701207, 'weight_decay': 0.4021123347372827, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 13 with value: 10.653389159860449.



Trial 19 finished with MAE: 11.707320983258004


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▁▂▃▃▄▆▆▅▅▄▄▄▃▃█▃▅▅▅▅▅▄▆▇▇▇▇▇▇▇▇▇▇▇▇▃▅▅▅▄
epoch,▁▂▅▇▃▇███▂▄▂▂▃▃▆▆▇▃▃▄▆█▄▅▇▄▆▆▇▂▂▃▅▆▁▂▂▂▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,214.49679
epoch,99
fold_10_mae,18.57296
fold_10_r2,0.11536
fold_10_status,completed
fold_1_mae,14.62982
fold_1_r2,0.40483
fold_1_status,completed
fold_2_mae,16.45997
fold_2_r2,0.25239


[I 2025-04-28 18:29:36,848] Trial 20 finished with value: 14.085532549471996 and parameters: {'hidden_dim': 512, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.31490073404775604, 'weight_decay': 0.07634060710885536, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 13 with value: 10.653389159860449.



Trial 20 finished with MAE: 14.085532549471996


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,███▇▇▅███▇▇▆▅▄▄████▇▁███▆██▆▅▄█████▂██▇▅
epoch,▂▂▅▅▁▆▇█▂▄▇▇▃▃█▁▄▇▂▃▇▇▇█▃▇█▂▂▂██▁▁▂▃▇█▃█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,136.06551
epoch,99
fold_10_mae,13.43882
fold_10_r2,0.40054
fold_10_status,completed
fold_1_mae,11.89287
fold_1_r2,0.55782
fold_1_status,completed
fold_2_mae,15.19986
fold_2_r2,0.32732


[I 2025-04-28 18:31:19,364] Trial 21 finished with value: 11.373040734659195 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.6223489819912422, 'weight_decay': 0.29397777752484444, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 13 with value: 10.653389159860449.



Trial 21 finished with MAE: 11.373040734659195


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇████▇▆██▇▃▇▄▄▂▇▆▆▄▂▇▆▅█▇▁▇▇▇▅█▃███▄▄▃▇▁
epoch,▁▃▄▇▁▃▇▂▂▄▇▅▅▅▇▂▄▅▇▂▃▃▄▆▆▁▂▂▂▃██▂▂▂▄▅▅▅▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,126.63772
epoch,99
fold_10_mae,11.16969
fold_10_r2,0.61317
fold_10_status,completed
fold_1_mae,10.61364
fold_1_r2,0.64082
fold_1_status,completed
fold_2_mae,10.80563
fold_2_r2,0.61465


[I 2025-04-28 18:33:02,309] Trial 22 finished with value: 10.475050739088143 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.6956900111069457, 'weight_decay': 0.3362528349829468, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 22 with value: 10.475050739088143.



Trial 22 finished with MAE: 10.475050739088143


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128

  X_filtered = X[mask]
  y_filtered = y[mask]



Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,█▄█▇▆▆▃█▅▅████▇▇▆▅█▇▁██▇▆▁█▇▇▁█▄▃▃█▇▄▂▂▇
epoch,▆▃▄▅█▄▆▆▆█▅▇▁▂▇▆▇▇█▁▄▅▆▇▂▂▃▄▄▄▇▁▂▃▅▃▃▄▅█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,131.14367
epoch,99
fold_10_mae,10.89841
fold_10_r2,0.61878
fold_10_status,completed
fold_1_mae,10.61245
fold_1_r2,0.64328
fold_1_status,completed
fold_2_mae,10.16215
fold_2_r2,0.65124


[I 2025-04-28 18:34:44,269] Trial 23 finished with value: 11.172991578387622 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.6002701637286822, 'weight_decay': 0.1986173194268701, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 22 with value: 10.475050739088143.



Trial 23 finished with MAE: 11.172991578387622


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,████▆██▄▁█████▆█████▄████▇████▇▇▅▄██▇▁▁▇
epoch,▁▂▂▁▂▂▃▃▄▄▆▁▃▃▆▂▃▃▅▆▆▁▂▃▄▂▅▆██▃▃▂▃▃▅▅▅▇▄
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,207.54137
epoch,99
fold_10_mae,10.56978
fold_10_r2,0.63793
fold_10_status,completed
fold_1_mae,10.70644
fold_1_r2,0.64254
fold_1_status,completed
fold_2_mae,10.40127
fold_2_r2,0.62563


[I 2025-04-28 18:42:10,918] Trial 24 finished with value: 11.377962772865244 and parameters: {'hidden_dim': 1024, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.6870045533920448, 'weight_decay': 0.2611792230588581, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 22 with value: 10.475050739088143.



Trial 24 finished with MAE: 11.377962772865244


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▆▅▄▄█▇▂▁█▇▇▇▆▃██▇▇▆▃▃▃██▄████▃█▇▃▂▇▃█▄▁
epoch,▂▄▇▂▄▂▂▃▅▂▂▆▇▇█▄▅▅▆▆▃▃▅██▄▆▇▂▃▇▁▁▂▄▅▆▃▄▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,174.14933
epoch,99
fold_10_mae,10.59508
fold_10_r2,0.63344
fold_10_status,completed
fold_1_mae,11.90033
fold_1_r2,0.59486
fold_1_status,completed
fold_2_mae,19.71987
fold_2_r2,-0.13558


[I 2025-04-28 18:44:01,061] Trial 25 finished with value: 13.132308477072206 and parameters: {'hidden_dim': 64, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.6992168228006574, 'weight_decay': 0.4405751707509298, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 22 with value: 10.475050739088143.



Trial 25 finished with MAE: 13.132308477072206


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▇▆▅▂▆▂█▆▅▄▂▃▂█▄▃▃▁▆▄█▅▃▄█▄▃▁▄▃▁▁▇▇▄▆▅▅▃▁
epoch,▁▃▄▅▆▂▃▃▄▄▁▄▆▇▅▃▃▄▅▇▂▃▅▆▂▁▃▄▆█▂▂▂▃▆█▄▄▄▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,169.47497
epoch,99
fold_10_mae,139987.06897
fold_10_r2,-36101259.3659
fold_10_status,completed
fold_1_mae,2766.07381
fold_1_r2,-13936.38
fold_1_status,completed
fold_2_mae,314323.06723
fold_2_r2,-187765643.71281


[I 2025-04-28 18:47:16,581] Trial 26 finished with value: 515902.04174854734 and parameters: {'hidden_dim': 512, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.5280668843892921, 'weight_decay': 0.34753101647111584, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': False, 'optimizer': 'SGD'}. Best is trial 22 with value: 10.475050739088143.



Trial 26 finished with MAE: 515902.04174854734


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,███▇▇█▃▇▇▇▃▇▇▇▅▃██▅███▅▇▇▇▆▂▁▇▇▁██▇███▇▃
epoch,▃▃▆▇▃▃▄▄▆▂▆▆▆▇▇▁▂▂▆▇▄▅▇▅▅▁▅▇▂▂▅▆▇▇▁▆▆▆▇█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,205.84422
epoch,99
fold_10_mae,10.36911
fold_10_r2,0.6429
fold_10_status,completed
fold_1_mae,10.61514
fold_1_r2,0.64144
fold_1_status,completed
fold_2_mae,10.56263
fold_2_r2,0.62354


[I 2025-04-28 18:50:50,386] Trial 27 finished with value: 11.172480401942982 and parameters: {'hidden_dim': 512, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.5851811098281076, 'weight_decay': 0.16911427621031125, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 22 with value: 10.475050739088143.



Trial 27 finished with MAE: 11.172480401942982


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▇▇▅▇▇▇▆▄██▆▄▁██▇▇▃███▇▇█▇▂▂▆▇▆▅▃▂█▄▂▇▄
epoch,▃▄▄▅▇█▂▃█▅▁▂▃▄▅▇▂▃▆▂▃▃▅▃▆▇▇█▄▅▆█▂▃█▂▃▃▄▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,187.89628
epoch,99
fold_10_mae,10.60138
fold_10_r2,0.64088
fold_10_status,completed
fold_1_mae,15.36327
fold_1_r2,0.372
fold_1_status,completed
fold_2_mae,16.25333
fold_2_r2,0.27837


[I 2025-04-28 18:52:40,940] Trial 28 finished with value: 11.733146914925019 and parameters: {'hidden_dim': 64, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.44880292685881007, 'weight_decay': 0.6140747695143112, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 22 with value: 10.475050739088143.



Trial 28 finished with MAE: 11.733146914925019


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▄▄▁▇▇▇▇▃▃▃▄▅▅▄▄████▇▇▅▅▅▆▅▅▇▆▆▅▆███▂▂▃▃▅
epoch,▁▄▅▅▆▃▆▂▃▅▂▄▇▂▃▄▅▅▇▁▂▄▆▂▃█▂▃▄█▁▃▄▆▆▃▄▆▇█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,145.43201
epoch,99
fold_10_mae,12.53687
fold_10_r2,0.52706
fold_10_status,completed
fold_1_mae,12.74419
fold_1_r2,0.49922
fold_1_status,completed
fold_2_mae,11.88571
fold_2_r2,0.54276


[I 2025-04-28 18:54:23,297] Trial 29 finished with value: 12.128038710527576 and parameters: {'hidden_dim': 64, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.16480058547010418, 'weight_decay': 0.08284901801847182, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'SGD'}. Best is trial 22 with value: 10.475050739088143.



Trial 29 finished with MAE: 12.128038710527576


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█▇▅██▇▇▄▄▇▇▇▇▁█▆▄▃▂▂▇▆▆▅▃▂██▅▄▁▇▇█▇▅██▇▂
epoch,▂▆▇▅▇██▁▁▁▃▄▆█▁▄▃▄▄▅▂▄▅▇█▂▂▃▃█▅▆▁▃▃▆▅▆▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,160.25026
epoch,99
fold_10_mae,10.01376
fold_10_r2,0.65222
fold_10_status,completed
fold_1_mae,10.82388
fold_1_r2,0.6217
fold_1_status,completed
fold_2_mae,10.21381
fold_2_r2,0.6544


[I 2025-04-28 18:56:12,033] Trial 30 finished with value: 10.420316148682053 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.09094485011752312, 'weight_decay': 0.8315668301052384, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 30 with value: 10.420316148682053.



Trial 30 finished with MAE: 10.420316148682053


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▅▃▇▇▆▆▅▅▄▆▅██▆▅▆▆▆▂▇▄▁▆▅▂▇▇▇▅▃▇▇▆▅▇▇▆▄▂
epoch,▁▄▅▅█▄▇▂▂▃▇▁▁▂▄▃▇▇▃▄▅▅▂▅▅▂▄▅▇█▂▄▄▄▇▂▃▄▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,158.32681
epoch,99
fold_10_mae,11.0867
fold_10_r2,0.60365
fold_10_status,completed
fold_1_mae,10.90536
fold_1_r2,0.62407
fold_1_status,completed
fold_2_mae,10.53053
fold_2_r2,0.63236


[I 2025-04-28 18:58:00,939] Trial 31 finished with value: 10.880153691033634 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.14029126591227192, 'weight_decay': 0.7330818549229802, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 30 with value: 10.420316148682053.



Trial 31 finished with MAE: 10.880153691033634


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▇▇▇▃▇▆▃▁▆▆▆▅▅▃██▇▅▇▇▂▇▇▄▄▄▇▆▇▇▆▃▇▄█▇▆▃
epoch,▆█▁▃▇▅▁▃▅▆█▁▅▆█▄▅▇▂▃▃▅▅▅▇▂▂▅▇▇▇█▁▅▇▂▂▃▃█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,149.7595
epoch,99
fold_10_mae,10.09765
fold_10_r2,0.68089
fold_10_status,completed
fold_1_mae,10.66643
fold_1_r2,0.64651
fold_1_status,completed
fold_2_mae,18.66654
fold_2_r2,-0.06046


[I 2025-04-28 18:59:49,915] Trial 32 finished with value: 12.649677992355834 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.27810149269961043, 'weight_decay': 0.8066688084470555, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 30 with value: 10.420316148682053.



Trial 32 finished with MAE: 12.649677992355834


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▅▄▄▂▆▆▆▆▆▃▆▇▇▅▄▃███▇▇▇▂▂▂▅▄▄▄▁▇▇▆▄▅▂▇▅▄
epoch,▂▂▃▃▃▇█▁▂▄▇█▂▂▃▂▄▄▅█▃▄▅▅▇▇▁▄▆▁▅▁▂▅▅▆▆▇▇▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,165.16115
epoch,99
fold_10_mae,10.09994
fold_10_r2,0.66087
fold_10_status,completed
fold_1_mae,10.74942
fold_1_r2,0.62164
fold_1_status,completed
fold_2_mae,10.10826
fold_2_r2,0.63853


[I 2025-04-28 19:01:38,860] Trial 33 finished with value: 10.125529462794038 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.04997140230184105, 'weight_decay': 0.8645453887645732, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 33 with value: 10.125529462794038.



Trial 33 finished with MAE: 10.125529462794038


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█▇▇▇▃▇▇▇▆▄██▁▁█▃▃██▅▄████▂▂██▇▂▂▁▇▇███▇▄
epoch,▄▄▄▆▆▃▅▆▇█▁▅▁▄▅▅▇▂▃▃▅▆▇▇▂▄▅▇██▃▅▅▂▃▅▃▃▃▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,160.86777
epoch,99
fold_10_mae,10.3694
fold_10_r2,0.64049
fold_10_status,completed
fold_1_mae,10.69626
fold_1_r2,0.63704
fold_1_status,completed
fold_2_mae,10.81882
fold_2_r2,0.60503


[I 2025-04-28 19:03:29,033] Trial 34 finished with value: 10.886854416284262 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.12860968409919404, 'weight_decay': 0.9984967025825453, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 33 with value: 10.125529462794038.



Trial 34 finished with MAE: 10.886854416284262


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▇▇▇▆▆▅▇▇▆▇▇▇▇▅▅▇▇▇▆▅▇▅▇▇▆▄▁▇▇▆▆▇▇▇▆▄██▇▅
epoch,▂▆▁▁▂▃▆▇▃▅▇▇█▂▃▅▇█▃▆█▅▇█▁▅▅▆▆▇▄▄▅▆▄▁▃▄▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,116.81005
epoch,99
fold_10_mae,10.50962
fold_10_r2,0.64574
fold_10_status,completed
fold_1_mae,11.08059
fold_1_r2,0.6043
fold_1_status,completed
fold_2_mae,10.33664
fold_2_r2,0.64205


[I 2025-04-28 19:05:13,360] Trial 35 finished with value: 10.524810951145009 and parameters: {'hidden_dim': 32, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.09242700667543902, 'weight_decay': 0.8264870890284749, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 33 with value: 10.125529462794038.



Trial 35 finished with MAE: 10.524810951145009


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,██▇▇▆▃▃▇▄▇▆▆▄▃█▇▄▇▇▇▆▄█▇▇▁▁▇▇▆▆▆█▆▆▄▃▇▇▇
epoch,▁▂▃▄▇▃▄█▁▂▅▅▆▁▅▁▂▃▄▄▅▆▆▂▂▅▆▇▂▇▂▃▄▆▅▆▁▂▃▅
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,79.13085
epoch,99
fold_10_mae,10.09893
fold_10_r2,0.65965
fold_10_status,completed
fold_1_mae,11.00347
fold_1_r2,0.5906
fold_1_status,completed
fold_2_mae,12.48669
fold_2_r2,0.47758


[I 2025-04-28 19:06:46,860] Trial 36 finished with value: 10.66211395625389 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.07021315426143684, 'weight_decay': 0.8116934576104412, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 33 with value: 10.125529462794038.



Trial 36 finished with MAE: 10.66211395625389


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█▇▆▄▃██▆▆▆█▇▇▇█▆▄██▂█▆▆▄▁▆█▆▄▃▇▆▅▄███▆▆▅
epoch,▂▄▅▆▇▂▂▂▄▅█▁▂▃▆▂▄▄▄▄█▅▁▃▆▂▃▇█▂▃▅▅▆█▁▆▆▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,160.148
epoch,99
fold_10_mae,21.0223
fold_10_r2,-0.08399
fold_10_status,completed
fold_1_mae,21.01644
fold_1_r2,-0.10986
fold_1_status,completed
fold_2_mae,20.90524
fold_2_r2,-0.10914


[I 2025-04-28 19:08:35,413] Trial 37 finished with value: 20.53085604391138 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.06847938491456074, 'weight_decay': 0.9449544885952397, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': False, 'optimizer': 'SGD'}. Best is trial 33 with value: 10.125529462794038.



Trial 37 finished with MAE: 20.53085604391138


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▇▇▃█▇▅▃▂██▁██▆▅▇▆█▇▄▇▇▄▃▃▂█▇▇▅▃▇▆▆▃▁▇▆▅▃
epoch,▂▂▅▇▂▆▇▇▂▂▆▇▂▃▆▅▅▇█▃▄▄▂▂▃▇▇█▂▃▃▆▂▂▃▆▁▂▂▅
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,117.2534
epoch,99
fold_10_mae,13.6673
fold_10_r2,0.40174
fold_10_status,completed
fold_1_mae,10.99991
fold_1_r2,0.61702
fold_1_status,completed
fold_2_mae,15.74264
fold_2_r2,0.29203


[I 2025-04-28 19:10:14,905] Trial 38 finished with value: 12.03260034765717 and parameters: {'hidden_dim': 32, 'num_layers': 2, 'batch_size': 8192, 'learning_rate': 0.22970344551078736, 'weight_decay': 0.8745462259235626, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 33 with value: 10.125529462794038.



Trial 38 finished with MAE: 12.03260034765717


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇██▇▇▆▆▆█▇▇▇▇▇▄▄▄█▄█▇▃▂▁▇▇▅▃█▇▅▄▃█▇▆▅▄▁
epoch,▆▇▇█▁▅▁▁▂▄▅▅▇▂▆▇▁▅▇▇▃▅▆▇█▅▅▅▆▂▆▃▄▆▂▄▅▅██
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,77.16631
epoch,99
fold_10_mae,10.74462
fold_10_r2,0.63731
fold_10_status,completed
fold_1_mae,10.94474
fold_1_r2,0.62467
fold_1_status,completed
fold_2_mae,10.53823
fold_2_r2,0.61527


[I 2025-04-28 19:11:49,456] Trial 39 finished with value: 10.558554112798145 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.08424889507918099, 'weight_decay': 0.8005278420861465, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 33 with value: 10.125529462794038.



Trial 39 finished with MAE: 10.558554112798145


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▃▅▅▃▃▃▁▂▂▃▂█▂▂▃▃▃▂▂▂▁▂▂▂▂▂▂▂▂▃█▂▃▃▃▂▂▂▂▃
epoch,▂▄▅▅▅▂▃▅▅▆▃▄▅▇▂▃██▅▆▁▂▄▅▂▄▇▃▄▄▇██▂▃▄▂▄▅▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,229.52863
epoch,99
fold_10_mae,18.72335
fold_10_r2,0.11745
fold_10_status,completed
fold_1_mae,11.13228
fold_1_r2,0.58507
fold_1_status,completed
fold_2_mae,11.48009
fold_2_r2,0.57314


[I 2025-04-28 19:13:48,888] Trial 40 finished with value: 12.298944455026094 and parameters: {'hidden_dim': 128, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.021453980444734774, 'weight_decay': 0.5816560686071215, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 33 with value: 10.125529462794038.



Trial 40 finished with MAE: 12.298944455026094


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▅▅▄█▇▇▆▆▅▅▇▇▇▇▅▄▇▇▇▇▁▇▅▅████▇▇▅▃▇▆▇▇▇▄
epoch,▁▄▆▇▇▁▁▃▆▇▁▃▄▅█▂▅▅▁▆▇█▂▃▄▆▆▇▁▂▄▆▆█▅▂▃▃▅▃
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,77.64527
epoch,99
fold_10_mae,17.10813
fold_10_r2,0.08454
fold_10_status,completed
fold_1_mae,10.95297
fold_1_r2,0.60296
fold_1_status,completed
fold_2_mae,10.48414
fold_2_r2,0.61558


[I 2025-04-28 19:15:24,522] Trial 41 finished with value: 10.912144558747073 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.09012616900660786, 'weight_decay': 0.7788754708727864, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 33 with value: 10.125529462794038.



Trial 41 finished with MAE: 10.912144558747073


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▇▅▅▂█▇▇▇▇▆▆▁▇▅▄▄▄▄█▇▂▂▇▇▆▅▅▅▇▇██▇▇▇▇▄▃
epoch,▃▅▅▆█▅▁▁▂▄▅▆▇▇▇▅▂▂▃▄▄▄▇██▂▂▇▄▄▆▃▃▄▅▂▃▃▄▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,78.25824
epoch,99
fold_10_mae,10.12103
fold_10_r2,0.66153
fold_10_status,completed
fold_1_mae,10.96063
fold_1_r2,0.60849
fold_1_status,completed
fold_2_mae,10.35449
fold_2_r2,0.63399


[I 2025-04-28 19:16:58,020] Trial 42 finished with value: 11.406829468815747 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.1067864353301496, 'weight_decay': 0.8724979744355028, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 33 with value: 10.125529462794038.



Trial 42 finished with MAE: 11.406829468815747


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█▃▃▃▃▆▅▂▁▁▄▄▄▄▄▃▃▂▅▅▄▅▅▅▅▄▅▄▃▃▅▄▃▃▃▅▅▄▃▃
epoch,▂▃▄▇▁▆▇▁▃▆▆▆█▂▃▇▁▃▄▆▇█▂▅▅█▂▂▃▄▇█▃▆▆▂▃▄▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,82.56835
epoch,99
fold_10_mae,10.24399
fold_10_r2,0.66712
fold_10_status,completed
fold_1_mae,10.73069
fold_1_r2,0.65094
fold_1_status,completed
fold_2_mae,10.37508
fold_2_r2,0.64762


[I 2025-04-28 19:18:32,299] Trial 43 finished with value: 10.102849423114582 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0007912464190683727, 'weight_decay': 0.7024858020568682, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 43 finished with MAE: 10.102849423114582


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128

  X_filtered = X[mask]
  y_filtered = y[mask]



Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,████▇█▅▇█▅▅█▇▇▇▆█▇▄▁▆▄▃▄▃██▆▆▆▅▇▆▆▇▆▆▅▆▄
epoch,▁▂█▂▅█▁▂▃▃▆█▂▃▃▇▇▇██▄▆▆▁▂▃▃▅▆▂▆▂▂▄▅▅▅▅▂▄
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,77.63094
epoch,99
fold_10_mae,30.82467
fold_10_r2,-1.37214
fold_10_status,completed
fold_1_mae,31.17867
fold_1_r2,-1.402
fold_1_status,completed
fold_2_mae,31.32849
fold_2_r2,-1.53194


[I 2025-04-28 19:20:05,268] Trial 44 finished with value: 30.55136527526569 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.17603536379622847, 'weight_decay': 0.6869468155922412, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': False, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 44 finished with MAE: 30.55136527526569


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█▆▇▆▂▇▅▆▆▆▆▇▄▃▂▇▇▇▇▆▄▇▃▃▇▄▄▇▇▇▃▆▆▇▇▃▇▇▇▁
epoch,▂▂▂▃▅█▅██▁▄▅▃▃▃▁▁▂▂▄▆▇█▂▅▅▆▆▇▇▆▇▁▄▅▆▁▃▄▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,79.13296
epoch,99
fold_10_mae,10.88295
fold_10_r2,0.60564
fold_10_status,completed
fold_1_mae,11.44055
fold_1_r2,0.59169
fold_1_status,completed
fold_2_mae,10.81205
fold_2_r2,0.5847


[I 2025-04-28 19:21:39,679] Trial 45 finished with value: 10.762297693502182 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.046501327382696155, 'weight_decay': 0.9431770336511069, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 45 finished with MAE: 10.762297693502182


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▂▅▅▃██▁▂▃▃▇█▂▂▂▂▇▇▃▃▅▄▄▂▃▄▆▆▆▄▇▇▄▅▅▂▄█▆▅
epoch,▁▂▃▃▃▅█▁▆▇▁▃▄▅▇▄▄▅▅█▃▄▅▅▇▅▆▆▇▅▅▇▇▁▇▁▃▄▅█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.10966
epoch,99
fold_10_mae,15.13102
fold_10_r2,0.35821
fold_10_status,completed
fold_1_mae,15.29302
fold_1_r2,0.33368
fold_1_status,completed
fold_2_mae,15.00566
fold_2_r2,0.34148


[I 2025-04-28 19:23:14,850] Trial 46 finished with value: 14.749845039598469 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0351509819942777, 'weight_decay': 0.759949191177838, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'SGD'}. Best is trial 43 with value: 10.102849423114582.



Trial 46 finished with MAE: 14.749845039598469


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▆▅▅▅▄██▇▇▆▅▅▅█▇▇▃▃▃▇▇▆▅▃▃▂▁█▆▅▄▂▅▃██▇▂
epoch,▁▂▂▂▅▃▆▇█▂▃▄▅██▄█▂▄▅▇█▂▃▄█▂▂▃▃▆▆▁▂▃▆▇▇██
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,159.34554
epoch,99
fold_10_mae,10.35
fold_10_r2,0.66376
fold_10_status,completed
fold_1_mae,10.6252
fold_1_r2,0.64524
fold_1_status,completed
fold_2_mae,11.00348
fold_2_r2,0.62214


[I 2025-04-28 19:25:04,011] Trial 47 finished with value: 10.987646282459641 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.24629441414627753, 'weight_decay': 0.8373910930991855, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 47 finished with MAE: 10.987646282459641


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▇▇▇▇▅▇██▇▇▇█▇▇▅▇███▅▆██▇█▇▆▆▄█▁███▇▇▇▅▃▄
epoch,▁▂▂▂▃▄▅█▁▄█▁▆█▁▄▆█▂▆▇▇▃▂▃▇▁▅▆▇▃▃▃▄▅▇▂▂▃█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,296.49549
epoch,99
fold_10_mae,11.26142
fold_10_r2,0.60619
fold_10_status,completed
fold_1_mae,11.68748
fold_1_r2,0.57138
fold_1_status,completed
fold_2_mae,10.91997
fold_2_r2,0.60984


[I 2025-04-28 19:35:27,037] Trial 48 finished with value: 11.45338905283839 and parameters: {'hidden_dim': 1024, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.18342866950597314, 'weight_decay': 0.6648954884062576, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 48 finished with MAE: 11.45338905283839


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▆▂▂▂▂▅▆▆▅▁▂▁▁▃▄▄▄▄█▇▄▄▄▅▅▇▆▆▅▄▆▆▆▃▂▁▂▂▂▃
epoch,▇▁▆█▁▅▅██▂▂▃▄▅▇█▂▃▅▇▆▆▇▇█▂▃▄▃▄▄█▁▂▃▄▅▁▂█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,82.54427
epoch,99
fold_10_mae,35.20048
fold_10_r2,-2.18697
fold_10_status,completed
fold_1_mae,35.41156
fold_1_r2,-2.21958
fold_1_status,completed
fold_2_mae,36.07428
fold_2_r2,-2.40909


[I 2025-04-28 19:37:01,513] Trial 49 finished with value: 35.30841825917598 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0016691872337532485, 'weight_decay': 0.8917766806242511, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': False, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 49 finished with MAE: 35.30841825917598


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,██▇▆▄▇▅▁▂▇▇▇▇▆▅█▇▆▅▅▇▄▄█▇▅▄▂▇▇▂▁▁▇▇▂▇▆▆▃
epoch,▄▄▅▅▇▆█▁▃▄▆▂▂▄▆▇▂▂▃▃▇▇▂▃▄▆▂▃▄▄▅▅▇▇▇▃▆█▁▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,156.70663
epoch,99
fold_10_mae,10.67649
fold_10_r2,0.63637
fold_10_status,completed
fold_1_mae,10.74099
fold_1_r2,0.61468
fold_1_status,completed
fold_2_mae,10.27337
fold_2_r2,0.63783


[I 2025-04-28 19:38:51,198] Trial 50 finished with value: 11.17456573653602 and parameters: {'hidden_dim': 32, 'num_layers': 3, 'batch_size': 8192, 'learning_rate': 0.34905514641138263, 'weight_decay': 0.4815397518015228, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 50 finished with MAE: 11.17456573653602


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128

  X_filtered = X[mask]
  y_filtered = y[mask]



Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▇▇▇▆▅▄███▆▅▄▇█▇▇▇▇▆▃▃█▇▇▇██▆▅▃█▅▅█▇▅▄▄▄▁
epoch,▃▂▂▃▃▅▇▁▁▄▇█▁▂▅▃▅▆▇▂▃▃▃▆▄▅▆▆▇█▃▃▃▄▆█▁▂▃▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,74.67016
epoch,99
fold_10_mae,10.12603
fold_10_r2,0.65569
fold_10_status,completed
fold_1_mae,10.47747
fold_1_r2,0.65288
fold_1_status,completed
fold_2_mae,10.17393
fold_2_r2,0.64758


[I 2025-04-28 19:40:25,443] Trial 51 finished with value: 10.307556382500366 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.11489952490873657, 'weight_decay': 0.9155157233289827, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 51 finished with MAE: 10.307556382500366


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▆▅▅▄▇█▆▇▆▅▃█▇▅▄▄▄▂█▇▇▆▆▇█▁██▆▃▁▇▂▁███▅
epoch,▂▃▄▅▆▂▃▅█▂▆▆█▁▂▅▆█▂▃▃▅▆▇██▁▄▆▂▅▇█▁▁▃▆▇▃▅
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,77.66311
epoch,99
fold_10_mae,14.92294
fold_10_r2,0.27235
fold_10_status,completed
fold_1_mae,11.0223
fold_1_r2,0.61778
fold_1_status,completed
fold_2_mae,10.93807
fold_2_r2,0.58164


[I 2025-04-28 19:41:59,788] Trial 52 finished with value: 12.04907486631677 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.12299328811410207, 'weight_decay': 0.912116949697779, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 52 finished with MAE: 12.04907486631677


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▆▆▆▆▅▂▆▆▆▄▂▆▆▆▆▃▆▆▅▆▂▇▆▅▅▃█▆▆▅▁▁▇▇▆▃▂▆▆▅
epoch,▃▅▅▅▁▃▄▇▂▃▄▄▆▇▇█▂▃▄▅█▁▁▄▇█▁▃▄▇▅▆▄▅▇▃▄▂▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,79.58133
epoch,99
fold_10_mae,10.49887
fold_10_r2,0.63353
fold_10_status,completed
fold_1_mae,10.49014
fold_1_r2,0.62512
fold_1_status,completed
fold_2_mae,10.54279
fold_2_r2,0.62432


[I 2025-04-28 19:43:34,946] Trial 53 finished with value: 10.316324383768269 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.05909132726894378, 'weight_decay': 0.735477407921625, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 53 finished with MAE: 10.316324383768269


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▅▆▆▅▅▄▅▅▅▅▃▇▇▇▇▇▆▆▆▂▆▆▆█▆▅▅▃▃▇▅▁▆▆▆▆▆▆▅▄
epoch,▁▆▇▁▄▁▂▃▅▆▁▅█▁▆▂▃▄▄▅▇█▂▄▆▇▂▃▃▃▅▅▇▁▂▃▆▇▅▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,79.90855
epoch,99
fold_10_mae,10.93483
fold_10_r2,0.58793
fold_10_status,completed
fold_1_mae,12.03084
fold_1_r2,0.54078
fold_1_status,completed
fold_2_mae,12.3661
fold_2_r2,0.47654


[I 2025-04-28 19:45:09,703] Trial 54 finished with value: 10.779047914108649 and parameters: {'hidden_dim': 32, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.05831979661801312, 'weight_decay': 0.7391741646386487, 'test_split': 0.1, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 54 finished with MAE: 10.779047914108649


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█▅▄▄▃▁▁▆▅▅▆▆▆▆▆▆▃▃▃▄▃▃▄▄▃▃▂▂▆▆▆▆▇▇▅▃▄▄▄▂
epoch,▄▄▅▅▆▇▁▁▂▆▇▂▂▂▅▆▇▁▂▃▇▃▃█▂▃▃▅▆▆▇▃▃▄▄█▅▅▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.87101
epoch,99
fold_10_mae,10.77114
fold_10_r2,0.6256
fold_10_status,completed
fold_1_mae,10.50086
fold_1_r2,0.64975
fold_1_status,completed
fold_2_mae,10.57083
fold_2_r2,0.62994


[I 2025-04-28 19:46:42,901] Trial 55 finished with value: 10.153418664629767 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.015011914688144862, 'weight_decay': 0.9902704457931398, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 43 with value: 10.102849423114582.



Trial 55 finished with MAE: 10.153418664629767


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▃▂▂▃▃▂▃▃▃▃▃▇▁▁▁▃▃▃▃▂▂▂▃▂▂▁▁▁▁▁▁▁▁█▂▁▁▁▃▁
epoch,▃▆▂▄▅█▁▁▂▆██▂▅▆▆▄▆▆▇▁▆▂▄▄▅▅▆▆▃▄▄▄▄▅▂▅▅▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,109.47325
epoch,99
fold_10_mae,10.34694
fold_10_r2,0.61823
fold_10_status,completed
fold_1_mae,10.15531
fold_1_r2,0.66333
fold_1_status,completed
fold_2_mae,10.262
fold_2_r2,0.62009


[I 2025-04-28 19:48:18,658] Trial 56 finished with value: 9.96612572216802 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0031664241214351234, 'weight_decay': 0.9983982699916027, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 56 finished with MAE: 9.96612572216802


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▄▂▁▃▅▄▄▄▆▃▃▃▃▂▂▃▅▃▄▄▄▆▇██▄▆▆▄▄▅▄▄▅▄▄▃▅▄▄
epoch,▃▄█▂▄▅█▃▄▄▇▂▂▅▆▇▄▆▆▁▁▂▃▆▆▇▇▂▃▃▅▆▇▆▇▃▃▄▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.08254
epoch,99
fold_10_mae,19.77407
fold_10_r2,0.02379
fold_10_status,completed
fold_1_mae,19.3671
fold_1_r2,0.04479
fold_1_status,completed
fold_2_mae,18.85416
fold_2_r2,0.04991


[I 2025-04-28 19:49:54,027] Trial 57 finished with value: 19.014155817158375 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.007423502421601633, 'weight_decay': 0.9719258956161314, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': True, 'optimizer': 'SGD'}. Best is trial 56 with value: 9.96612572216802.



Trial 57 finished with MAE: 19.014155817158375


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▅▂█▆▆▅▃▁▅▇▆▄▃█▆▆▅▅▃▂▅▂▆▅▂▆▆▆▆▅▆▄▇▇▆▆▆▅▁
epoch,▂▂▄▆▆▂▅▇▂▃▁▃▃▅▇▄▅▆█▁▃▄▄▄▆▇▂▂▆█▃▅▅▂▄█▁▁▃▅
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,107.7512
epoch,99
fold_10_mae,11.28584
fold_10_r2,0.59449
fold_10_status,completed
fold_1_mae,11.2276
fold_1_r2,0.6226
fold_1_status,completed
fold_2_mae,11.48116
fold_2_r2,0.55545


[I 2025-04-28 19:51:28,863] Trial 58 finished with value: 10.556417031760558 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.043565593953855106, 'weight_decay': 0.9890509242275345, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 58 finished with MAE: 10.556417031760558


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▄▂▃▃▃▃▃▃▃▃▅▃▃▃▂▂▂▃▃▃▃▂▂▂▃▃▃█▂▃▁▂▃▃▃▁▃▃▃▃
epoch,▁▂▃▅▄▄▄▅▆▆█▂▃▄▅▁▅▅▅▆▅▅▅▇▁▄▅▁▄▅▇▃▅▆▆▂▃▆▇█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.58615
epoch,99
fold_10_mae,10.81123
fold_10_r2,0.61648
fold_10_status,completed
fold_1_mae,10.80997
fold_1_r2,0.61296
fold_1_status,completed
fold_2_mae,10.66284
fold_2_r2,0.58509


[I 2025-04-28 19:53:02,725] Trial 59 finished with value: 10.233817583156673 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.02467465420012417, 'weight_decay': 0.9192814732621809, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 59 finished with MAE: 10.233817583156673


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▆▇▇▆▅▅▅▅▄██▁▁▄▄▄▄▄▇▃▃▃▃▃▄▄▄▄▅▅▅▁▄▄▆▆▆▆▅
epoch,▂▆▆█▂▅▅▅▁▁▃▄▄▆▁▇▂▄▄▅▃▄▅▅█▂▂▃▅▅█▄▄▆▆▁▁▄▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,109.83483
epoch,99
fold_10_mae,32.81008
fold_10_r2,-1.85918
fold_10_status,completed
fold_1_mae,33.32789
fold_1_r2,-1.89368
fold_1_status,completed
fold_2_mae,34.04258
fold_2_r2,-2.07935


[I 2025-04-28 19:54:35,523] Trial 60 finished with value: 33.22474166056348 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0036559715025875454, 'weight_decay': 0.9330386689936975, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': False, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 60 finished with MAE: 33.22474166056348


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,██▇▁▆██▇▅▄▅▆▅▄▄▇▆▅█▇▄▅▅▅▅▄▃▆▆▄▇▇▅▆▆▅▅▅▅▄
epoch,▄▄▅██▂▃▇▇▂▃▃▃██▂▃▆█▄▅▇█▁▂▄▆▇▇▁▄▅▆▇██▁▃▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.71614
epoch,99
fold_10_mae,10.5116
fold_10_r2,0.64405
fold_10_status,completed
fold_1_mae,10.64341
fold_1_r2,0.63651
fold_1_status,completed
fold_2_mae,10.62847
fold_2_r2,0.62397


[I 2025-04-28 19:56:09,439] Trial 61 finished with value: 10.162149820875396 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.02841973666559399, 'weight_decay': 0.9010701765306632, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 61 finished with MAE: 10.162149820875396


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▄▄▄▄▅▄▃▆▄▃▅▅▅▃▁▅▄▄▁▁▃▂▂▁▅▅▃▂▄▄▂▂▁▃▆▆▆▅▅█
epoch,▂▄▅█▂▇▅▅▂▄▅▆▂▂▄▄▅▆▆▇▃▃▆▇▃▁▃▃▄▅▁▃▃▄▅▆▂▂▄▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.75
epoch,99
fold_10_mae,10.68445
fold_10_r2,0.61615
fold_10_status,completed
fold_1_mae,10.66686
fold_1_r2,0.62608
fold_1_status,completed
fold_2_mae,10.82023
fold_2_r2,0.59883


[I 2025-04-28 19:57:43,844] Trial 62 finished with value: 10.220678907987423 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.03263625118133939, 'weight_decay': 0.9089523844754437, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 62 finished with MAE: 10.220678907987423


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▃▇▇▇▃█▆▅▇▇▇▆▅▅▃▅▅▃▃▂▃▄▄▁▄▇▆▇▆▅▂▃▄▅▃█▇▇▆▄
epoch,▆▆▁▂▃▂▃▃▃▄██▁▃▄▅▅▅▄▄▁▂▃▄▂▄▅▅▇▂▅▆▆▃▄▄▄▆▁▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.82423
epoch,99
fold_10_mae,10.82705
fold_10_r2,0.61015
fold_10_status,completed
fold_1_mae,10.6994
fold_1_r2,0.61201
fold_1_status,completed
fold_2_mae,10.63077
fold_2_r2,0.62525


[I 2025-04-28 19:59:16,884] Trial 63 finished with value: 10.337990880911232 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.025755757660854365, 'weight_decay': 0.9641813004200439, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 63 finished with MAE: 10.337990880911232


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▄▄▂▅▅▁▆▅▅▄▃▂▆▇▆▄▄▃▇▇▆▆▅▄▄██▇▇▇▅▄▄▇▇▃▄▃█▆
epoch,▃▆█▅▅▁▂▆▆▇█▂▅▆▇▆▇▇▃▄▆▇▁▂▃▆▇▇█▁█▂▃▄▆▇▁▂▃█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,109.04343
epoch,99
fold_10_mae,10.28559
fold_10_r2,0.64834
fold_10_status,completed
fold_1_mae,12.831
fold_1_r2,0.49321
fold_1_status,completed
fold_2_mae,10.56831
fold_2_r2,0.62754


[I 2025-04-28 20:00:50,497] Trial 64 finished with value: 10.748497788522783 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.031481125480891686, 'weight_decay': 0.9010518258162478, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 64 finished with MAE: 10.748497788522783


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,██▇▇▇▆███▇▇▇▇▄▄████▆██▇▇▆▄▄█▇▃████▆██▇▆▁
epoch,▆▇▄▆▇▂▄▇▄▅▃▄▄▆▁▄▄▄█▁▄▅▆▇█▂▂▃▅▅▇█▁▁▂▄▄▁▂▅
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,100.23699
epoch,99
fold_10_mae,12.28075
fold_10_r2,0.52512
fold_10_status,completed
fold_1_mae,10.90865
fold_1_r2,0.6299
fold_1_status,completed
fold_2_mae,17.01701
fold_2_r2,0.11749


[I 2025-04-28 20:02:23,946] Trial 65 finished with value: 12.690451584216905 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.15189421198484004, 'weight_decay': 0.8590902055436314, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 65 finished with MAE: 12.690451584216905


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▇▇▇▆▅▁▁▇▇▆▄▆█▆▇▇▇▆▅▅▃███▇▆▆▇▆▅▇▇▄▇▇▇▇▄
epoch,▂▄██▃▆▆▇▁▂▂▄▄▅▅▂▃▅▇▂▃▃▆▂▃▅▇█▁▂▇▅▇▁▂▄▅▅▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,107.99815
epoch,99
fold_10_mae,11.69237
fold_10_r2,0.56294
fold_10_status,completed
fold_1_mae,11.19971
fold_1_r2,0.5875
fold_1_status,completed
fold_2_mae,10.43947
fold_2_r2,0.62996


[I 2025-04-28 20:03:58,590] Trial 66 finished with value: 11.084561021343067 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.050042818219737384, 'weight_decay': 0.957595388117339, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 66 finished with MAE: 11.084561021343067


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▂▂▂▂▂▁▂▂▂▃▃▃▃▃▃▃▂▂▂▃▂▂▂▂▄▁▁▁▁▁▂▂▂█▂▂▁▁▁
epoch,▄▆▆▇█▆▇▇▁▃▅▇▅▆▂▇▁▁▃▃▅▆▁▃▃▄▄▆▆▇▄▄▄▅▆▆▃▄▄▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,109.29429
epoch,99
fold_10_mae,11.08708
fold_10_r2,0.58409
fold_10_status,completed
fold_1_mae,11.16508
fold_1_r2,0.59733
fold_1_status,completed
fold_2_mae,11.22838
fold_2_r2,0.54509


[I 2025-04-28 20:05:32,877] Trial 67 finished with value: 10.607988677030878 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.004974876148104209, 'weight_decay': 0.8638300871891916, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 67 finished with MAE: 10.607988677030878


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▇▇▆███▅▂█▆▆▇▇▅▇▇▇▁██▇▇▇▄▇██▃▃██▇▅▃▂▇▇▁
epoch,▄▄▅▅▆▇▂▅▆▇██▁▃▆▇█▂▃▆▄▆▇▇█▂▃▆▇▃▃▄▄▅▅▁▅▇▁▄
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,104.49651
epoch,99
fold_10_mae,10.6409
fold_10_r2,0.64625
fold_10_status,completed
fold_1_mae,13.12372
fold_1_r2,0.49104
fold_1_status,completed
fold_2_mae,10.99824
fold_2_r2,0.60763


[I 2025-04-28 20:07:05,346] Trial 68 finished with value: 11.325637828124083 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0730844594160794, 'weight_decay': 0.9907495546014509, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 68 finished with MAE: 11.325637828124083


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆███▇▆▄▅▆▆███▅▄█▆▅▆▇▆▆▆▅▆▇▅▆▆▆▄▆▆▅▄██▅█▁
epoch,▂▃▄▅▆▂▄▄▄▅▇▇▁▂▃▇▁▂▅▅▇▃█▃▄▆▆▂▃▄▄▅▆▇▁▅▆▅▅█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,107.74231
epoch,99
fold_10_mae,11.70592
fold_10_r2,0.55722
fold_10_status,completed
fold_1_mae,11.60908
fold_1_r2,0.59307
fold_1_status,completed
fold_2_mae,11.27164
fold_2_r2,0.59766


[I 2025-04-28 20:08:38,340] Trial 69 finished with value: 10.545091102834254 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.03344394668835478, 'weight_decay': 0.9151144114359268, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 69 finished with MAE: 10.545091102834254


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,█▃▅▁█▆▆▄██▅▄▂▃▇▇█▇▇▇▅▅▃██▇▄▄▃▃▇█▅▆▃███▇▅
epoch,▁▂▃▇█▃▄▄▅▆▁▂▂█▂▅▇▇▃▄▂▄▅▇▄▅▆▁▅▇▅▆▁▁▂▄▅▆▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,105.44958
epoch,99
fold_10_mae,16.66017
fold_10_r2,0.22718
fold_10_status,completed
fold_1_mae,17.93391
fold_1_r2,0.12568
fold_1_status,completed
fold_2_mae,17.81111
fold_2_r2,0.11839


[I 2025-04-28 20:10:13,233] Trial 70 finished with value: 17.5016565000687 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.10334816006445824, 'weight_decay': 0.851505169095089, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'SGD'}. Best is trial 56 with value: 9.96612572216802.



Trial 70 finished with MAE: 17.5016565000687


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,███▇▆▅▁▁███▇██▇▇▆███▆██▇█▇▇▄▃█▅▄███▇█▇▅▄
epoch,▃▃▅▅▆▇▁▃▄▅▅█▁▁▂▃▆▇█▇▁▂▃▆▆▆▃▄▅▆▆▃▄▅▇▂▅▇██
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,96.88186
epoch,99
fold_10_mae,10.55254
fold_10_r2,0.63546
fold_10_status,completed
fold_1_mae,10.92549
fold_1_r2,0.63363
fold_1_status,completed
fold_2_mae,10.22115
fold_2_r2,0.62874


[I 2025-04-28 20:11:46,921] Trial 71 finished with value: 10.306135107133994 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.1154505194376588, 'weight_decay': 0.9258360406798986, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 71 finished with MAE: 10.306135107133994


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▅▅▆▆▆▇▆▅▄▄▄▄▄▁▅▅▂▃▄▆▅▄▅▃▂▇▇▇▇▅▇███▆▇▇▆▆▄
epoch,▄▅▇█▃█▃▃▇▁▃▅▆▇▄▇▇▂▃▅▂▃▄▄▆█▂▂▂▄▇▁▃▅▅▂▃▄▅▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.83597
epoch,99
fold_10_mae,10.33629
fold_10_r2,0.63178
fold_10_status,completed
fold_1_mae,10.71561
fold_1_r2,0.63029
fold_1_status,completed
fold_2_mae,10.35338
fold_2_r2,0.63017


[I 2025-04-28 20:13:21,478] Trial 72 finished with value: 10.108859341776297 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.024831907902673735, 'weight_decay': 0.8873555250452763, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 72 finished with MAE: 10.108859341776297


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▅▅▅▅▅▄▄▄▄▄▃▄▄▃▂▃▃▃▃▃▃▃▃▃▃▄█▃▄▄▄▃▃▃▃▁▃▄▄▃
epoch,▄▆▆█▂▆▃▄▅▂▃▃▄▄▅█▁▂▂▃▁▄▅██▂▂▂▄▅▇▂▃▆▁▄▄▄▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.83716
epoch,99
fold_10_mae,10.77512
fold_10_r2,0.61509
fold_10_status,completed
fold_1_mae,10.36744
fold_1_r2,0.65264
fold_1_status,completed
fold_2_mae,10.76184
fold_2_r2,0.60747


[I 2025-04-28 20:14:56,184] Trial 73 finished with value: 10.140524410307815 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.019245777743210978, 'weight_decay': 0.8881719450464821, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 56 with value: 9.96612572216802.



Trial 73 finished with MAE: 10.140524410307815


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▃▃▂▂▂▂▁▁▁▁▃▃▂▂▂▃▃▁▅▃▃▆▂▂▂▄▃▃▃▃▂▂█▄▄▃▃▂▃▂
epoch,▁▃▄▅▅▂▃▃▄▄▅▅▇▃▄▅▇▇▂▇▂▄▅▆▆█▅▇▇▁▇▁▂▄▄▆█▂▃▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,110.1546
epoch,99
fold_10_mae,10.33838
fold_10_r2,0.64549
fold_10_status,completed
fold_1_mae,10.56192
fold_1_r2,0.65027
fold_1_status,completed
fold_2_mae,10.27821
fold_2_r2,0.63978


[I 2025-04-28 20:16:30,072] Trial 74 finished with value: 9.877532239154082 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0008774801523439884, 'weight_decay': 0.8900815210787304, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 74 finished with MAE: 9.877532239154082


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,██▇▅▅▇█▇▇▆▇▇▇▇▇▇▅▇▇▆▄▁██▇▅██▅█▇▇▇▆▇▇▇▆▆▅
epoch,▂▃▅▆▆██▂▃▅▂▇█▂▇▁▄▆▇▁▄▆▇▁▂▇▇▃▄▅▆▆▇█▅▂▄▅▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,106.87145
epoch,99
fold_10_mae,11.05374
fold_10_r2,0.5844
fold_10_status,completed
fold_1_mae,11.17641
fold_1_r2,0.58475
fold_1_status,completed
fold_2_mae,10.9463
fold_2_r2,0.60054


[I 2025-04-28 20:18:03,681] Trial 75 finished with value: 10.838359711157239 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.07365742897046712, 'weight_decay': 0.7837605388959439, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 75 finished with MAE: 10.838359711157239


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▁▂▂▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▆▁▂▂▂▂▂▂▁▁▁▁▁▁
epoch,▂▃▄▃▆▂▅▇█▃▁▁▄▅█▄▆▇▃▃▇▇▇▂▄▅▅▆▆▇▇█▂▅▇▂▃▃▅█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,150.95238
epoch,99
fold_10_mae,10.59165
fold_10_r2,0.61883
fold_10_status,completed
fold_1_mae,10.14687
fold_1_r2,0.67596
fold_1_status,completed
fold_2_mae,10.39133
fold_2_r2,0.63869


[I 2025-04-28 20:22:21,059] Trial 76 finished with value: 10.014197260779067 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0006192766350981774, 'weight_decay': 0.8777255035952366, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 76 finished with MAE: 10.014197260779067


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▂▁▁▁▁▂▂▂▁▁▄▂▂▂▃▂▂▂▂█▁▁▁▆▆▂▁▁▁▁▄▂▂▇▄▂▂▁▁▁
epoch,▂▃▃▃▄▇▂▂▃▃▁▃▇▁▁▄▄▅▁▂▄▅▁▄▆▇▄▇▂▂▆██▅█▂▄▅▅█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,151.70926
epoch,99
fold_10_mae,35.6231
fold_10_r2,-2.27016
fold_10_status,completed
fold_1_mae,36.12944
fold_1_r2,-2.31001
fold_1_status,completed
fold_2_mae,36.79386
fold_2_r2,-2.50528


[I 2025-04-28 20:26:38,828] Trial 77 finished with value: 35.97924566160553 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 3.324815788600943e-05, 'weight_decay': 0.8813147910277779, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': False, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 77 finished with MAE: 35.97924566160553


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▆▇▅▅▅▇▇▆▆▆▆▆▅▃▅▅▂▆▆▂▆▆▅▄▆▅▂▁█▇▇▇▆▆▆▄▆▆▅▄
epoch,▁▂▃▅▆▇▁▂▃▄▇█▁▃▄▇▁▃▆▆█▅▇▂▄▆▆▂▅▂▆▇▇█▁▃▃▄▄█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,149.09054
epoch,99
fold_10_mae,10.55351
fold_10_r2,0.62243
fold_10_status,completed
fold_1_mae,10.76265
fold_1_r2,0.64016
fold_1_status,completed
fold_2_mae,10.47471
fold_2_r2,0.62203


[I 2025-04-28 20:30:56,150] Trial 78 finished with value: 10.659975817991413 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.061585958995175355, 'weight_decay': 0.9692853264530963, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 78 finished with MAE: 10.659975817991413


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▄▅▅▄▄▃▃▂▃▅▅▁▅▆▆▆▆▃▂▅▆▅▅▁▄▅▄▄▄▄▄▃▃▂▃▃▂█▅▄
epoch,▃▆▇▁▄▇█▁▂▄▆▁▂▃▆▃▄▁▂▄▅▇▂▃▄▇▁▂▂▄▄▅▅▆█▃▄▄▅▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,151.14063
epoch,99
fold_10_mae,11.60013
fold_10_r2,0.56596
fold_10_status,completed
fold_1_mae,11.55227
fold_1_r2,0.57603
fold_1_status,completed
fold_2_mae,11.59022
fold_2_r2,0.55433


[I 2025-04-28 20:35:12,921] Trial 79 finished with value: 11.479631495389018 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.012750208836743002, 'weight_decay': 0.8417673829535456, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 79 finished with MAE: 11.479631495389018


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▂▂▃▆▃▅▃▄▆▆▂▃▃▂▃▃▂▄▄▅▄▂▂▃▃▆▁▁▃▅██▇▇▇▃▄▅▆▆
epoch,▁▃▅▆▇▄▄▄▅▆▆█▁▁▂▅▅▆▂▃▃▄▅██▆▇▄▅▆▂▃▃▁▃▆▇▂▅▅
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.35813
epoch,99
fold_10_mae,20.09452
fold_10_r2,0.0037
fold_10_status,completed
fold_1_mae,19.76623
fold_1_r2,0.01225
fold_1_status,completed
fold_2_mae,19.76642
fold_2_r2,-0.00187


[I 2025-04-28 20:36:46,992] Trial 80 finished with value: 19.481543688719434 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.04807761470565364, 'weight_decay': 0.5458538993776321, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': True, 'optimizer': 'SGD'}. Best is trial 74 with value: 9.877532239154082.



Trial 80 finished with MAE: 19.481543688719434


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▅▅▄▂▃▃▄▄▃▄▂█▇▇▇▁▁▃▄▄▅▅▄▃▅▄▄▃▃▂▄▃▃▅▅▅▄▄▃▃
epoch,▂▄▄▅▂▃▄▄▅▆▇▇▁▁▂▄▃▅▆▇▇█▂▄▅▇█▂▃▃▆▇▇▁▅▂▁▂▂▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.98763
epoch,99
fold_10_mae,11.64279
fold_10_r2,0.5734
fold_10_status,completed
fold_1_mae,11.07252
fold_1_r2,0.60887
fold_1_status,completed
fold_2_mae,10.59354
fold_2_r2,0.60585


[I 2025-04-28 20:38:20,897] Trial 81 finished with value: 10.31893988246838 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.019654289902083757, 'weight_decay': 0.9471857210779333, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 81 finished with MAE: 10.31893988246838


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,█▇▆▅▃▅▇▇▆▆▆▁█▇▆▄▄▆▇▇▆▅▆▆▆▅▆▆▄▂▆▇▆▅▃▅▇▇▇▄
epoch,▁▂▄▅▆▇▇▁▂▂▁▂▄▄▆█▁▅▅▇▆▄▅██▂▂▅▅▄▅▃▅▅▅▁▂▅▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.84979
epoch,99
fold_10_mae,12.61564
fold_10_r2,0.47147
fold_10_status,completed
fold_1_mae,10.73655
fold_1_r2,0.62164
fold_1_status,completed
fold_2_mae,11.13434
fold_2_r2,0.60197


[I 2025-04-28 20:39:54,902] Trial 82 finished with value: 11.003934261901529 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.04463845713650357, 'weight_decay': 0.8200824594162366, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 82 finished with MAE: 11.003934261901529


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▇▇▇▇▇▇█▇▇▅▅▂▁▅▇▇▆▃▃▇▇▇▇▇▆▅▇█▇▄▇██▇▄██▆▆
epoch,▂▃▅▆▇█▃▄▅▅▂▃▅▇▇▃▅▆▇▁▅▆▂▃▆▁▅█▁▂▅█▁▃▄▆▇▅▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,147.45958
epoch,99
fold_10_mae,11.05174
fold_10_r2,0.60127
fold_10_status,completed
fold_1_mae,10.89929
fold_1_r2,0.62143
fold_1_status,completed
fold_2_mae,10.5317
fold_2_r2,0.63048


[I 2025-04-28 20:44:12,261] Trial 83 finished with value: 10.749670480702068 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.08381226110767928, 'weight_decay': 0.8875251165052683, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 83 finished with MAE: 10.749670480702068


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▆▆▆▆▆▆▃▇▄▄▃▃▆▆▆▃▇▇▅▄▆▇█▇▆▁▃▅▆▅▄▄▄▂▂▆▅▅▃▃
epoch,▁▅▁▃▄▅▆▇▁▃▄▄▄▅▃▆▃▄▄▆▃▅▆▆▇█▁█▂▆▂▃▄▅▇▃▄▆▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,108.8764
epoch,99
fold_10_mae,10.52002
fold_10_r2,0.64032
fold_10_status,completed
fold_1_mae,10.76484
fold_1_r2,0.62023
fold_1_status,completed
fold_2_mae,10.71263
fold_2_r2,0.58628


[I 2025-04-28 20:45:46,225] Trial 84 finished with value: 10.116192871070034 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.01903108356609185, 'weight_decay': 0.9952527349305832, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 84 finished with MAE: 10.116192871070034


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▆▇▅▅▄▃▄▄▄▄▅▆▆▅▄▄▅▄▄▃███▅▆███▆▁▃▂▅▅▅▁▃▃▃▂
epoch,▇▂▂▄▇▃▄▄▄▇▃▅▆█▂▅▆▇▁▅▆▇█▁▂▅▆█▅▆▁▆▆▆▆▇▂▂▄▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,150.81381
epoch,99
fold_10_mae,11.16234
fold_10_r2,0.59212
fold_10_status,completed
fold_1_mae,10.82975
fold_1_r2,0.62826
fold_1_status,completed
fold_2_mae,11.38489
fold_2_r2,0.55647


[I 2025-04-28 20:50:03,446] Trial 85 finished with value: 10.882245908117913 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.01742819842631981, 'weight_decay': 0.9989520832675354, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 85 finished with MAE: 10.882245908117913


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▇▆▆▆▅▃▃▃█▇▇▄▃▆▇▇▇▇▆▅▅▅▃▁▇▅▅▇██▄▇▄▃▇▅▅▄▇▄
epoch,▁▅▅▆▇▅▆█▁▂▁▄▅▅▇▅▆▇▅▆▁▂▃▅▇▁▂▂▄▆▂▃▄▆▂▆▆▆▇█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,107.71369
epoch,99
fold_10_mae,11.32638
fold_10_r2,0.57504
fold_10_status,completed
fold_1_mae,11.05683
fold_1_r2,0.60018
fold_1_status,completed
fold_2_mae,11.49471
fold_2_r2,0.56362


[I 2025-04-28 20:51:37,152] Trial 86 finished with value: 10.710399304768604 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.060787420995392194, 'weight_decay': 0.9695005876248358, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 86 finished with MAE: 10.710399304768604


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▇▅▇▇▇▇▇▇▇▆▃▇▇▆▆▃▃█▇▇▆▅▇▇▆█▇▄▂▇▇▅▃▁▁▇▄▇▇▁
epoch,▁▁▂▄▅▇█▂▄▆▁▄▆▆▇▄█▁▂▄▂▃▄▄▇▃▃▃▄▄▂▃▄▅█▁▂▄▆▅
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,95.06854
epoch,99
fold_10_mae,12.55239
fold_10_r2,0.54697
fold_10_status,completed
fold_1_mae,13.21684
fold_1_r2,0.5111
fold_1_status,completed
fold_2_mae,14.3466
fold_2_r2,0.44471


[I 2025-04-28 20:53:11,278] Trial 87 finished with value: 12.612750016148444 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.4056135180637783, 'weight_decay': 0.7668469580985204, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 87 finished with MAE: 12.612750016148444


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▇▇▆▆▃▇▅▅▇▇▆▃█▆▅▃▄▁▇█▆▅▅▅▇▆█▇▇▆▃▇▇▇█▇▇▇▇▂
epoch,▃▄▁▂▂▄▇▇▅▅▁▂▃▆▇██▆▇▃▃▆▇█▁▂▂▇▇▃▇▁▂▂▄▆▆▆▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,104.08063
epoch,99
fold_10_mae,10.66652
fold_10_r2,0.61417
fold_10_status,completed
fold_1_mae,20.3175
fold_1_r2,-0.06697
fold_1_status,completed
fold_2_mae,13.20314
fold_2_r2,0.474


[I 2025-04-28 20:54:45,954] Trial 88 finished with value: 12.86526458264704 and parameters: {'hidden_dim': 128, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.09350311478229423, 'weight_decay': 0.93886844189722, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 88 finished with MAE: 12.86526458264704


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▇▂▇▆▁▇▇▇▅▅▄▄▄▅▆▇▇▇▆▃▇▅▃██▄▃▂▆▆▅▂▂▇▇▄▃▇▇▃
epoch,▁▅▁▃▆▃▃▂▂▅▅▇▇▇█▃▃▇█▃▄▄▆▆▇▁▁▃▃▅▁▃▅▆▆▁▃▅██
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,94.28728
epoch,99
fold_10_mae,27.54566
fold_10_r2,-1.10557
fold_10_status,completed
fold_1_mae,30.21902
fold_1_r2,-1.21111
fold_1_status,completed
fold_2_mae,28.88689
fold_2_r2,-1.26062


[I 2025-04-28 20:56:17,783] Trial 89 finished with value: 28.60365106677269 and parameters: {'hidden_dim': 64, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.040597813095638785, 'weight_decay': 0.7093800398804077, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': False, 'optimizer': 'AdamW'}. Best is trial 74 with value: 9.877532239154082.



Trial 89 finished with MAE: 28.60365106677269


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▂▂▂▃▂▁▅▃▂▂▂▃▂▂▂▂▁▃▂▅▂▂▂▂▁▂█▅▂▂▁▃▃▂▂▄▂▂▁▁
epoch,▄▆▆▆▇▄▂▃▄▄▅▁▁▆█▁▂▂▄▁▂▄▅▅▂▂▃▄▄▅▆▇▃▅▃▆▂▄▄▆
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,151.72983
epoch,99
fold_10_mae,10.20741
fold_10_r2,0.67194
fold_10_status,completed
fold_1_mae,10.26114
fold_1_r2,0.66959
fold_1_status,completed
fold_2_mae,9.79157
fold_2_r2,0.68618


[I 2025-04-28 21:00:35,467] Trial 90 finished with value: 9.709505771320321 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 6.138813704997803e-05, 'weight_decay': 0.6376163670090491, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 90 with value: 9.709505771320321.



Trial 90 finished with MAE: 9.709505771320321


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▁▁▁▁▁▂▁▁▁▁▁▂▂▁▁▂▂▂▂█▁▁▁▁▂▃▂▁▁▁▁▁▁▂▂▁▁▁▁▁
epoch,▁▁▃▄▄▆▂▃▄▅▁▂▃▄▅▂▂▃▃▆▄▄▅▆▇▄▄▅▅▅▇▂▆▇▇▂▃▃▄█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,150.95324
epoch,99
fold_10_mae,10.44509
fold_10_r2,0.61971
fold_10_status,completed
fold_1_mae,10.61525
fold_1_r2,0.63735
fold_1_status,completed
fold_2_mae,10.31298
fold_2_r2,0.6334


[I 2025-04-28 21:04:51,528] Trial 91 finished with value: 10.002669633880856 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0006219306237738656, 'weight_decay': 0.6310047798367444, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 90 with value: 9.709505771320321.



Trial 91 finished with MAE: 10.002669633880856


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,█▃▂▂▂▂▂▂▂▃▂▂▁▁▁▂▂▂▁▁▄▄▃▁▂▁▁▁▄▄▂▃▂▂▃▂▂▂▂▂
epoch,▅▆█▂▅▇▂▃▆▆▄▄▆▆▇▁▁▁▂▃▅▆▆▅▆▇▄▄▄▅▂▅▅▆█▃▃▄▅▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,151.12273
epoch,99
fold_10_mae,10.43695
fold_10_r2,0.61777
fold_10_status,completed
fold_1_mae,10.15266
fold_1_r2,0.67663
fold_1_status,completed
fold_2_mae,10.1622
fold_2_r2,0.64697


[I 2025-04-28 21:09:08,789] Trial 92 finished with value: 9.92491127481213 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.00045513615293842416, 'weight_decay': 0.6664200335615593, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 90 with value: 9.709505771320321.



Trial 92 finished with MAE: 9.92491127481213


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▁▁▁▁▁▂▁▁▁▁▂▁█▁▁█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▁
epoch,▂▃▄▄▅▇█▁▂▅█▃▅▆▇▃▄▅▇▃█▁▂▄▄▇█▁▃▃▇▃▃▅▅█▆▇▇█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,150.97619
epoch,99
fold_10_mae,10.47657
fold_10_r2,0.62404
fold_10_status,completed
fold_1_mae,10.46084
fold_1_r2,0.64954
fold_1_status,completed
fold_2_mae,10.31083
fold_2_r2,0.64148


[I 2025-04-28 21:13:25,462] Trial 93 finished with value: 9.973340799089076 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0005982307229017278, 'weight_decay': 0.6292851627262841, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 90 with value: 9.709505771320321.



Trial 93 finished with MAE: 9.973340799089076


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▃▃▃▃▂▂▂▂▂▂▁▁▁▂▁▁▁▄▁▂▂▂▂▂▂▂▂▂▂▁▂▂▁▂▂▂▂▂█▂
epoch,▁▃▅▅█▃▃▅▇▁▂▄▂▂▂██▃▅▆▇▅▆▁▂▂▃▁▃▅▆▇▇▂▃▄▆▇██
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,150.94071
epoch,99
fold_10_mae,13.51479
fold_10_r2,0.45403
fold_10_status,completed
fold_1_mae,12.26868
fold_1_r2,0.51534
fold_1_status,completed
fold_2_mae,12.81406
fold_2_r2,0.47178


[I 2025-04-28 21:17:41,316] Trial 94 finished with value: 12.60172988650453 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0022714931407640487, 'weight_decay': 0.6500954573301613, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 90 with value: 9.709505771320321.



Trial 94 finished with MAE: 12.60172988650453


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▃▂▂▂█▇▄▂▅▁▆▆▆▆▇▄▇▆▃▁▇▇▂▇▆▆▅▄▂▆▆▆▄▂▇▇▇▇▄▂
epoch,▅▅▆▇█▆▄▄▄▅▅▅▅▇█▆▂▄▅▇▅▅▆▂▃▄▄▄▆▆█▁▁▁▂▃█▃▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,150.52417
epoch,99
fold_10_mae,12.45424
fold_10_r2,0.52416
fold_10_status,completed
fold_1_mae,10.89631
fold_1_r2,0.61541
fold_1_status,completed
fold_2_mae,12.49055
fold_2_r2,0.51206


[I 2025-04-28 21:21:57,005] Trial 95 finished with value: 10.7317025509956 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.07327517926271737, 'weight_decay': 0.6047489290040589, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 90 with value: 9.709505771320321.



Trial 95 finished with MAE: 10.7317025509956


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.916
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,██▆▅▅▂▃▃▃▁▁▁█▇▆▅▅▄▄▅▂▂▂▄▂▁▁▄▄▄▆▆▄▇▇▆▆▅▅▅
epoch,▃▅█▁▃█▃▆▇▃▆██▃▃▄▅▇▇█▅█▂▂▂▅▅▁▄▅▁▂▄▄▆▁▂▃▅█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,150.9956
epoch,99
fold_10_mae,10.44699
fold_10_r2,0.62384
fold_10_status,completed
fold_1_mae,10.55197
fold_1_r2,0.64163
fold_1_status,completed
fold_2_mae,10.19445
fold_2_r2,0.64368


[I 2025-04-28 21:26:15,324] Trial 96 finished with value: 10.05546330445617 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0006300685765751046, 'weight_decay': 0.6300251520285426, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 90 with value: 9.709505771320321.



Trial 96 finished with MAE: 10.05546330445617


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915


  X_filtered = X[mask]
  y_filtered = y[mask]


Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


0,1
absolute_weight_entropy,▂▃▃▃▂▃▃█▄▂▂▂▂▂▂▃▃▃▃▃▃▁▁▁▁▂▂▂▂▂▁▁▂▂▂▂▂▂▂▂
epoch,▃▃▂▆▆▇▇▂▅▆▆▆▄▄▄▆▇▂▂▃▅██▁▁▂▄▅▁▂▄▄▆▇█▆▆▇▆█
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,150.97446
epoch,99
fold_10_mae,14.9012
fold_10_r2,0.37515
fold_10_status,completed
fold_1_mae,15.13504
fold_1_r2,0.3673
fold_1_status,completed
fold_2_mae,11.59854
fold_2_r2,0.53707


[I 2025-04-28 21:30:31,880] Trial 97 finished with value: 13.297643736833013 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.0019222486780591755, 'weight_decay': 0.5621322078493703, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 90 with value: 9.709505771320321.



Trial 97 finished with MAE: 13.297643736833013


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▅▅▅▅▄▃▅▅▄▄██▅▅▇▇▁▅▄▆▆▆▆▆▅▆▆▆▆▆▆▂▅▅▆▅▅▄▇▆
epoch,▃▅█▁▂▂▅▇▁▄▆▆██▂▆▇▃▃▄█▂▃▁▂▂▃▅▇▇▃▃▄▆▇▂▅▆▆▇
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,151.02567
epoch,99
fold_10_mae,11.12178
fold_10_r2,0.57587
fold_10_status,completed
fold_1_mae,11.09933
fold_1_r2,0.58959
fold_1_status,completed
fold_2_mae,10.67835
fold_2_r2,0.59125


[I 2025-04-28 21:34:53,157] Trial 98 finished with value: 10.592990422438607 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.038859535726657346, 'weight_decay': 0.6235748134774715, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': False, 'normalize_y': True, 'optimizer': 'AdamW'}. Best is trial 90 with value: 9.709505771320321.



Trial 98 finished with MAE: 10.592990422438607


  age_metadata = pd.read_csv('sampleMetadata.csv', index_col='sample_id', dtype={'age': float})


Original dimensions: (6128, 1265)
Reduced dimensions: (6128, 256)
Number of components: 256
Explained variance ratio: 0.915
Original samples: 6128
Samples after filtering strata with <10 occurrences: 5416
Removed 712 samples


  X_filtered = X[mask]
  y_filtered = y[mask]


0,1
absolute_weight_entropy,▁▂▆▆▆▂▄▆▆▆▆▂▂▆██▁▁▅▇▁▂▄██▇▇▁▁▂▁▁▂▂▂▇▇▂▆█
epoch,▁▁▂▃▃▄█▂▂█▄▄▆▇█▃▆▁▂▃▄▆▇▇█▅▆▆█▃▆▂▄▃▄▆▇▃▄▅
fold_10_mae,▁
fold_10_r2,▁
fold_1_mae,▁
fold_1_r2,▁
fold_2_mae,▁
fold_2_r2,▁
fold_3_mae,▁
fold_3_r2,▁

0,1
absolute_weight_entropy,149.92891
epoch,99
fold_10_mae,20.11318
fold_10_r2,0.00116
fold_10_status,completed
fold_1_mae,19.94275
fold_1_r2,-0.00564
fold_1_status,completed
fold_2_mae,19.91889
fold_2_r2,-0.02253


[I 2025-04-28 21:39:06,633] Trial 99 finished with value: 19.511890862871432 and parameters: {'hidden_dim': 1024, 'num_layers': 1, 'batch_size': 8192, 'learning_rate': 0.054609617975211466, 'weight_decay': 0.5070747945267132, 'test_split': 0.2, 'num_epochs': 100, 'normalize_X': True, 'normalize_y': True, 'optimizer': 'SGD'}. Best is trial 90 with value: 9.709505771320321.



Trial 99 finished with MAE: 19.511890862871432

Study statistics: 
  Number of finished trials: 100
  Number of pruned trials: 0
  Number of complete trials: 100

Best trial:
  Best MAE: 9.709505771320321

Best parameters:
    hidden_dim: 1024
    num_layers: 1
    batch_size: 8192
    learning_rate: 6.138813704997803e-05
    weight_decay: 0.6376163670090491
    test_split: 0.2
    num_epochs: 100
    normalize_X: False
    normalize_y: True
    optimizer: AdamW

Parameter importances:
    learning_rate: 0.458
    normalize_X: 0.144
    weight_decay: 0.116
    hidden_dim: 0.108
    normalize_y: 0.087
    optimizer: 0.071
    test_split: 0.016
    num_layers: 0.000
    num_epochs: 0.000
    batch_size: 0.000


[W 2025-04-28 21:39:08,793] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,794] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,795] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,795] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,796] Param num_epochs unique value length is less than 2.
[W 2025-04-28 21:39:08,796] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,797] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,798] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,798] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,799] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,800] Param num_epochs unique value length is less than 2.
[W 2025-04-28 21:39:08,802] Param batch_size unique value length is less than 2.
[W 2025-04-28 21:39:08,804] 

Error creating some plots: module 'optuna.visualization' has no attribute 'plot_param_relationships'
Some plots may not be available due to insufficient data or other requirements
