In [2]:
# === Core Data Science Libraries ===
import numpy as np
import pandas as pd

# === Visualization ===
import matplotlib.pyplot as plt
import seaborn as sns

# === PyTorch: Deep Learning ===
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from tqdm import tqdm  # Progress bar

# PyTorch Distributions
from torch.distributions.normal import Normal

# PyTorch Metrics
from torchmetrics.regression import MeanSquaredError, MeanAbsolutePercentageError

# === Machine Learning: Scikit-learn ===
from sklearn.model_selection import train_test_split, cross_validate, StratifiedKFold, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import (
    make_scorer, precision_score, recall_score, roc_auc_score,
    mean_squared_error, mean_absolute_error, r2_score
)
from sklearn.decomposition import PCA

# === Statistical & Distance-Based Methods ===
from scipy.spatial.distance import braycurtis, pdist, squareform
from scipy.spatial import procrustes
from scipy import stats
from scipy.stats import pearsonr, spearmanr, f_oneway, ttest_ind

# Multiple Hypothesis Testing
from statsmodels.stats.multitest import multipletests

# Ordination Methods
from skbio.stats.ordination import pcoa

# Distance-Based Statistical Tests
from skbio.stats.distance import permanova
from skbio import DistanceMatrix

# === Utility Libraries ===
import warnings
from collections import Counter

# === Hyperparameter Tuning & Distributed Training ===
import optuna

In [3]:
warnings.filterwarnings("ignore")

In [4]:
import os

os.environ["OMP_NUM_THREADS"] = "2"  # Limit to 2 threads
os.environ["OPENBLAS_NUM_THREADS"] = "2"
os.environ["MKL_NUM_THREADS"] = "2"
os.environ["VECLIB_MAXIMUM_THREADS"] = "2"
os.environ["NUMEXPR_NUM_THREADS"] = "2"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [5]:
torch.set_default_tensor_type('torch.cuda.FloatTensor')

torch.cuda.empty_cache()

DEVICE = (
    "cuda"
    if torch.cuda.is_available()
    else "cpu"
)

In [6]:
%cd /home/zuzannak/MicroFormer/

/home/zuzannak/MicroFormer


In [93]:
taxonomy = pd.read_csv('inputs/taxonomy_relab.csv', index_col = [0], low_memory=False).fillna(0).sort_index()
pathways = pd.read_csv('inputs/pathways_relab.csv', index_col = [0], low_memory=False).fillna(0).sort_index()
metadata = pd.read_csv('inputs/metadata.csv', index_col= [0], low_memory=False).sort_index()

pathways_sum           = pathways.astype(bool).astype(int).sum(axis=1).sort_values()
pathways_keep_subjects = pathways_sum[pathways_sum > pathways_sum.quantile(.1)].index
pathways               = pathways.loc[pathways_keep_subjects]

taxonomy_sum           = taxonomy.astype(bool).astype(int).sum(axis=1).sort_values()
taxonomy_keep_subjects = taxonomy_sum[taxonomy_sum > taxonomy_sum.quantile(.1)].index
taxonomy               = taxonomy.loc[taxonomy_keep_subjects]

keep_idx   = list(set(pathways.index).intersection(taxonomy.index))
metadata   = metadata[metadata['sample_id'].isin(keep_idx)]
taxonomy   = taxonomy.loc[keep_idx]
pathways   = pathways.loc[keep_idx]

common_idx = list(set(taxonomy.index).intersection(pathways.index))

metadata   = metadata[metadata['sample_id'].isin(common_idx)]

In [94]:
metadata['SICK'] = np.where(metadata.study_condition == 'control', 0, 1)

keep_samples_H   = metadata[(metadata['SICK'] == 0)].drop_duplicates('sample_id', keep='first').sample(1000, random_state=32).sample_id.tolist()
keep_samples_S   = metadata[(metadata['SICK'] == 1)].drop_duplicates('sample_id', keep='first').sample(1000).sample_id.tolist()

keep_samples     = keep_samples_H + keep_samples_S

taxonomy         = taxonomy.loc[keep_samples]
pathways         = pathways.loc[keep_samples].iloc[:, 2:].dropna()

In [88]:
def filter_prevalence(df, treshold = 0.1):
    '''features as columns'''
    df_binary = df.copy()
    df_binary[df_binary>0]=1
    df_binary_sum = df_binary.sum(axis=0)
    
    keep_features = df_binary_sum[df_binary_sum > df.shape[0]*treshold].index
    filtered_df = df[keep_features]
    
    return filtered_df


filtered_taxonomy = filter_prevalence(taxonomy)
filtered_pathways = filter_prevalence(pathways, .1)

filtered_taxonomy = filtered_taxonomy[~filtered_taxonomy.index.duplicated(keep='first')]
filtered_pathways = filtered_pathways[~filtered_pathways.index.duplicated(keep='first')]

filtered_taxonomy = filtered_taxonomy.div(filtered_taxonomy.sum(axis=1), axis=0)
filtered_pathways = filtered_pathways.div(filtered_pathways.sum(axis=1), axis=0)

filtered_pathways = filtered_pathways.dropna()
filtered_taxonomy = filtered_taxonomy.dropna()

common_idx        = list(set(filtered_pathways.index).intersection(filtered_taxonomy.index))

filtered_taxonomy = filtered_taxonomy.loc[common_idx].sort_index()
filtered_pathways = filtered_pathways.loc[common_idx].sort_index()*10

In [89]:
filtered_pathways.shape, filtered_taxonomy.shape

((6000, 409), (6000, 196))

In [90]:
filtered_pathways.isna().sum().sum(), filtered_taxonomy.isna().sum().sum()

(np.int64(0), np.int64(0))

In [12]:
class BacteriaModel(nn.Module):
    def __init__(self, num_pathways, num_bacteria, embedding_dim, latent_size):
        super(BacteriaModel, self).__init__()
        
        self.embedding_dim = embedding_dim
        self.latent_size = latent_size
        
        # Bacteria encoding
        self.pathways_embedding = nn.Embedding(num_pathways, embedding_dim) 
        self.taxonomy_embedding = nn.Embedding(num_bacteria, embedding_dim) #czy wektory embeddingow koreluje z filogenetyka czy on mimo braku informacji rozumie filogenetyke
        
        # Linear layer to transform (B, #Bac, 1) to (B, #Bac, D)
        self.linear_layer = nn.Linear(1, embedding_dim)
        
        # Transformer encoder
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=2, batch_first=True, dropout=0.05)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=1)

        self.final_encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=embedding_dim, batch_first=True)
        self.final_transformer_encoder = nn.TransformerEncoder(self.final_encoder_layer, num_layers=1)
        
        # Mean and Logvar layers
        self.latent_layer = nn.Linear(embedding_dim, latent_size)
        
        # Linear transformation for latent vector
        self.linear_transform = nn.Linear(latent_size, embedding_dim)
        
        # Transformer decoder
        self.decoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=2, batch_first=True) 
        self.transformer_decoder = nn.TransformerEncoder(self.decoder_layer, num_layers=1) 
        
        # Final linear layer to transform (B, #Bac, D) to (B, #Bac, 1)
        self.latent_norm = nn.LayerNorm(latent_size)

        self.output_transform = nn.Sequential(
            nn.Linear(embedding_dim, 1))
        
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
                
    
    def forward(self, pathways_tensor, bacteria_tensor):
        
        batch_size = pathways_tensor.size(0)
        num_pathways = pathways_tensor.size(1)
        num_bacteria = bacteria_tensor.size(1)

        # Step 1: Transform (B, #Bac, 1) to (B, #Bac, D)
        pathways_transformed = self.linear_layer(pathways_tensor)
        bacteria_transformed = self.linear_layer(bacteria_tensor)
        
        # Step 2: Create bacteria encoding
        pathways_indices = torch.arange(num_pathways).to(pathways_tensor.device)
        pathways_encoded = self.pathways_embedding(pathways_indices)
        bacteria_indices = torch.arange(num_bacteria).to(bacteria_tensor.device)
        bacteria_encoded = self.taxonomy_embedding(bacteria_indices)
        
        # Expand bacteria encoding to match batch size and add to bacteria_transformed
        pathways_encoded_expanded = pathways_encoded.unsqueeze(0).expand(batch_size, -1, -1)
        pathways_result = pathways_transformed + pathways_encoded_expanded
        bacteria_encoded_expanded = bacteria_encoded.unsqueeze(0).expand(batch_size, -1, -1)
        bacteria_result = bacteria_transformed + bacteria_encoded_expanded
        
        res_concat = torch.cat([bacteria_result, pathways_result], dim=1)

        # Step 3: Pass through Transformer encoder
        output = self.transformer_encoder(res_concat)
        output = self.final_transformer_encoder(output)

        # Step 4: Average over sequence (mean along the sequence dimension)
        mean_vector = output.mean(dim=1)

        # Step 5: Compute mean and logvar
        latent = self.latent_layer(mean_vector)

        # Step 7: Transform z to (B, D)
        D_vector = self.linear_transform(latent)
        
        # Step 8: Repeat D_vector for each bacterium to match (B, #Bac, D)
        D_vector_expanded = D_vector.unsqueeze(1).repeat(1, num_pathways, 1)
        pathways_result_with_encoding = D_vector_expanded + pathways_encoded_expanded

        D_vector_expanded_taxonomy = D_vector.unsqueeze(1).repeat(1, num_bacteria, 1)
        bacteria_result_with_encoding = D_vector_expanded_taxonomy + bacteria_encoded_expanded
        
        # Step 9: Pass through Transformer decoder
        pathways_output = self.transformer_decoder(pathways_result_with_encoding) #zamienic na encoder i usunąć
        bacteria_output = self.transformer_decoder(bacteria_result_with_encoding) #zamienic na encoder i usunąć
        
        # Step 10: Transform output to (B, #Bac, 1)
        pathways_output_transformed = self.output_transform(pathways_output)
        bacteria_output_transformed = self.output_transform(bacteria_output)

        return bacteria_output_transformed, pathways_output_transformed, latent

#Loss function

def loss_function(taxonomy_x, taxonomy_x_hat, pathways_x, pathways_x_hat, distance_weight=.1):
    """
    Calculate loss for both taxonomy and pathways outputs.
    
    Args:
        taxonomy_x: Original taxonomy data
        taxonomy_x_hat: Reconstructed taxonomy data
        pathways_x: Original pathways data
        pathways_x_hat: Reconstructed pathways data
        mean: Mean of the latent distribution
        logvar: Log variance of the latent distribution
        beta: Weight for KL divergence loss
        distance_weight: Weight for distance preservation loss
    """
    # Basic reconstruction losses for both outputs
    taxonomy_reconstruction_loss = F.mse_loss(taxonomy_x_hat, taxonomy_x, reduction='mean')
    pathways_reconstruction_loss = F.mse_loss(pathways_x_hat, pathways_x, reduction='mean')
    
    # Combined reconstruction loss
    reconstruction_loss = taxonomy_reconstruction_loss + pathways_reconstruction_loss
        
    # Distance preservation term for both outputs
    def distance_matrix(x):
        return torch.cdist(x.squeeze(-1), x.squeeze(-1))
    
    # Taxonomy distances
    taxonomy_true_distances = distance_matrix(taxonomy_x)
    taxonomy_rec_distances = distance_matrix(taxonomy_x_hat)
    taxonomy_distance_loss = F.mse_loss(taxonomy_rec_distances, taxonomy_true_distances)
    
    # Pathways distances
    pathways_true_distances = distance_matrix(pathways_x)
    pathways_rec_distances = distance_matrix(pathways_x_hat)
    pathways_distance_loss = F.mse_loss(pathways_rec_distances, pathways_true_distances)
    
    # Combined distance loss
    distance_loss = taxonomy_distance_loss + pathways_distance_loss
    
    # Variance preservation for both outputs
    taxonomy_variance_loss = torch.abs(taxonomy_x.var() - taxonomy_x_hat.var())
    pathways_variance_loss = torch.abs(pathways_x.var() - pathways_x_hat.var())
    variance_loss = taxonomy_variance_loss + pathways_variance_loss
    
    # Total loss
    total_loss = (reconstruction_loss + 
                  distance_weight * distance_loss + 
                  0.1 * variance_loss)
    
    # Return individual loss components for monitoring
    return total_loss

### Param tuning

In [96]:
def calculate_disparity(ypred_tensor, ytrue_tensor):
    
    ypred      = ypred_tensor.cpu().detach().numpy()[:, :, 0]
    ytrue      = ytrue_tensor.cpu().detach().numpy()[:, :, 0]
    
    pcoa_ypred = pcoa(squareform(pdist((ypred)))).samples
    pcoa_ytrue = pcoa(squareform(pdist((ytrue)))).samples
    
    mtx1, mtx2, disparity = procrustes(pcoa_ypred, pcoa_ytrue)
    
    return disparity
    

def run_anova(z, y):
    
    '''
    Run ANOVA on each latent dimensions between different study conditions

    Params
    ----------------------
    
    z: latent layer
    y: y variable here study conidition
    
    '''
    
    val_z_df = pd.DataFrame(z.cpu().detach().numpy())
    val_z_df['y'] = y
    
    anova_results = [
        {
            "latent": i,
            "stat": stat,
            "pvalue": pval
        }
        
        for i in range(z.shape[1])
        for stat, pval in [f_oneway(*[group.iloc[:, i].tolist() 
                                      for _, group in val_z_df.groupby('y')])]
    ]
    
    anova_results_df = pd.DataFrame(anova_results)
    anova_results_df['qvalue'] = multipletests(anova_results_df["pvalue"], method="fdr_bh")[1]
    
    return anova_results_df

def run_ttest(z, y):

    ''' 
    Run t-test on latent dimensions between sick and healthy subjects

    Params
    ----------------------
    
    z: latent layer
    y: y variable here health status 1: sick, 0:sick
    
    '''
    
    val_z_df = pd.DataFrame(z.cpu().detach().numpy())
    val_z_df['y'] = y
    
    # Perform t-test for each latent dimension
    ttest_results = []
    for i in range(z.shape[1]):
        x_sick = val_z_df[val_z_df.y == 1][[i]].values
        x_healthy = val_z_df[val_z_df.y == 0][[i]].values
        
        stat, pval = ttest_ind(x_sick, x_healthy, equal_var=False)  # Welch's t-test (unequal variance)
        ttest_results.append({"latent":i,
                              "stat":stat[0],
                              "pvalue":pval[0]})
    
    
    ttest_results_df = pd.DataFrame(ttest_results)
    ttest_results_df['qvalue'] = multipletests(ttest_results_df["pvalue"], method="fdr_bh")[1]
    
    return ttest_results_df

def run_permanova(z, y):

    
    ''' 
    Run permanova on samples with y as grouping variable

    Params
    ----------------------
    
    z: latent layer
    y: y variable here health status 1: sick, 0:sick
    
    '''
    
    val_z_df = pd.DataFrame(z.cpu().detach().numpy())
    dm = squareform(pdist(val_z_df, 'cosine'))
    permanova_pvalue = float(permanova(DistanceMatrix(dm), y)['p-value'])

    return permanova_pvalue

def run_latent_analysis(model, taxonomy_val, pathways_val):

    '''
    Run tests on latent layer

    Params
    ----------------------
    model: pretrained model
    
    '''

    model = model.cpu()
 
    X = pd.concat([taxonomy_val, pathways_val], axis=1)
    
    health_status_dict = dict(zip(metadata.sample_id, metadata.SICK))
    disease_dict       = dict(zip(metadata.sample_id, metadata.study_condition))
    
    y_health_status    = X.index.map(health_status_dict).values.reshape(len(X))
    y_study_condition  = X.index.map(disease_dict).values.reshape(len(X))
    
    # Split features
    X_taxonomy = X.iloc[:, :taxonomy_val.shape[1]]
    X_pathways = X.iloc[:, taxonomy_val.shape[1]:]
    
    # Convert to tensors
    taxonomy_tensor = torch.tensor(X_taxonomy.values).float().unsqueeze(-1).cpu()#.to(DEVICE)
    pathways_tensor = torch.tensor(X_pathways.values).float().unsqueeze(-1).cpu()#.to(DEVICE)
    
    xT_pred, xP_pred, val_z = model(
        pathways_tensor, 
        taxonomy_tensor)
    
       
    anova_df = run_anova(val_z, y_study_condition)
    ttest_df = run_ttest(val_z, y_health_status)
    
    permanova_disease_value = run_permanova(val_z, y_study_condition)
    permanova_status_value = run_permanova(val_z, y_health_status)

    return anova_df, ttest_df, permanova_disease_value, permanova_status_value

In [99]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def run_model(params, train_taxonomy, train_pathways):
    
    batch_size    = params['batch_size']
    lr            = params['lr']
    embedding_dim = params['embedding_dim']
    latent_dim    = params['latent_dim']
    
    X_train_taxonomy, X_test_taxonomy = train_test_split(train_taxonomy*100, test_size=0.2, random_state=0)
    X_train_pathways, X_test_pathways = train_test_split(train_pathways*100, test_size=0.2, random_state=0)

    # Model and Data Parallelization
    num_bacteria = train_taxonomy.shape[1]
    num_pathways = train_pathways.shape[1]

    model = BacteriaModel(num_pathways, num_bacteria, embedding_dim, latent_dim).to(DEVICE)

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    # Data Preparation
    X_train_taxonomy_tensor     = torch.tensor(X_train_taxonomy.values).float().unsqueeze(-1)  
    X_train_taxonomy_dataloader = DataLoader(X_train_taxonomy_tensor,
                                             batch_size=batch_size,
                                             shuffle=False)
    
    X_train_pathways_tensor     = torch.tensor(X_train_pathways.values).float().unsqueeze(-1)  
    X_train_pathways_dataloader = DataLoader(X_train_pathways_tensor,
                                             batch_size=batch_size,
                                             shuffle=False)
    
    X_test_taxonomy_tensor      = torch.tensor(X_test_taxonomy.values).float().unsqueeze(-1)  
    X_test_taxonomy_dataloader  = DataLoader(X_test_taxonomy_tensor,
                                            batch_size=batch_size,
                                            shuffle=False)  
    
    X_test_pathways_tensor      = torch.tensor(X_test_pathways.values).float().unsqueeze(-1)  
    X_test_pathways_dataloader  = DataLoader(X_test_pathways_tensor,
                                            batch_size=batch_size,
                                            shuffle=False)  

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Training Loop
    history = []

    num_epochs = 30
    for epoch in range(num_epochs):
        
        model.train()
            
        training_loss      = 0.0
        train_steps        = 0.0
        train_taxonomy_mse = 0.0
        train_pathways_mse = 0.0
        
        for xT, xP in zip(X_train_taxonomy_dataloader, X_train_pathways_dataloader):
            
            xT = xT.to(DEVICE)
            xP = xP.to(DEVICE)
            
            optimizer.zero_grad()     
    
            xT_pred, xP_pred, z = model(xP, xT)
            loss = loss_function(xT, xT_pred, xP, xP_pred)  
            
            taxonomy_mse = nn.functional.mse_loss(xT_pred, xT, reduction='mean')
            pathways_mse = nn.functional.mse_loss(xP_pred, xP, reduction='mean')
    
            loss.backward(retain_graph=True)
            optimizer.step()
            
            training_loss      += loss.item()
            train_taxonomy_mse += taxonomy_mse.item()
            train_pathways_mse += pathways_mse.item()
            train_steps        += 1
    
        model.eval()
        
        test_loss               = 0.0 
        test_steps              = 0.0
        test_taxonomy_mse       = 0.0
        test_pathways_mse       = 0.0
        test_taxonomy_disparity = 0.0
        test_pathways_disparity = 0.0 
        
        with torch.no_grad():
            
            for xT, xP in zip(X_test_taxonomy_dataloader, X_test_pathways_dataloader):
                
                xT = xT.to(DEVICE)
                xP = xP.to(DEVICE)
            
                xT_pred, xP_pred, z = model(xP, xT)
                loss = loss_function(xT, xT_pred, xP, xP_pred)  
    
                test_taxonomy_disparity += calculate_disparity(xT_pred, xT)
                test_pathways_disparity += calculate_disparity(xP_pred, xP)
       
                taxonomy_mse = nn.functional.mse_loss(xT_pred, xT, reduction='mean')
                pathways_mse = nn.functional.mse_loss(xP_pred, xP, reduction='mean')
                
                test_loss         += loss.item()
                test_taxonomy_mse += taxonomy_mse.item()
                test_pathways_mse += pathways_mse.item()
                test_steps        += 1
                
        history.append({"epoch":epoch,
                     "train_loss":training_loss/train_steps,
                     "train_taxonomy_mse":train_taxonomy_mse/train_steps,
                     "train_pathways_mse":train_pathways_mse/train_steps,
                     "test_loss":test_loss/test_steps,
                     "test_taxonomy_mse":test_taxonomy_mse/test_steps,
                     "test_pathways_mse":test_pathways_mse/test_steps,
                     "test_taxonomy_disparity":test_taxonomy_disparity/test_steps,
                     "test_pathways_disparity":test_pathways_disparity/test_steps})

    return history, model

In [None]:
import optuna
import torch
import pandas as pd

# Separate data into validation and train set
val_idx_sick = (
        metadata[metadata['study_condition'].isin(['IBD', 'hypertension', 'CRC', 'T2D', 'T1D', 'cirrhosis'])]
        .groupby('study_condition')
        .sample(n=200 // 6, random_state=42).sample_id
)

val_idx_healthy = (
        metadata[metadata['study_condition'] == 'control']
        .sample(n=100, random_state=42).sample_id
)

train_idx = metadata[~metadata.sample_id.isin(val_idx_sick + val_idx_healthy)].sample_id
val_idx   = val_idx_sick.tolist() + val_idx_healthy.tolist() 

filtered_pathways_train = filtered_pathways[filtered_pathways.index.isin(train_idx)]
filtered_taxonomy_train = filtered_taxonomy[filtered_taxonomy.index.isin(train_idx)]

filtered_pathways_val = filtered_pathways[filtered_pathways.index.isin(val_idx)] *100
filtered_taxonomy_val = filtered_taxonomy[filtered_taxonomy.index.isin(val_idx)] *100


# Storage for results
trial_results     = []
trained_models    = {}
combined_anova_df = []
combined_ttest_df = []
permanova_df      = []

def objective(trial):
    """Objective function for Optuna hyperparameter tuning."""
    
    batch_size    = trial.suggest_categorical("batch_size", [16, 32, 64])
    lr            = trial.suggest_loguniform("lr", 1e-5, 1e-2) 
    embedding_dim =  trial.suggest_categorical("embedding_dim", [32, 64, 128])
    latent_dim    =  trial.suggest_categorical("latent_dim", [4, 8, 16, 32, 64])



    params = {
        "batch_size": batch_size,
        "lr": lr,
        "embedding_dim": embedding_dim,
        "latent_dim": latent_dim
    }

    history, model = run_model(params, filtered_taxonomy_train, filtered_pathways_train)

    best_test_loss = min([epoch["test_loss"] for epoch in history])

    anova_df, ttest_df, permanova_disease_value, permanova_status_value = run_latent_analysis(model, filtered_taxonomy_val, filtered_pathways_val)
    
    trial_results.append({
        "trial": trial.number,
        "params": params,
        "history": history,
        "best_test_loss": best_test_loss
    })

    combined_anova_df.append(anova_df)
    combined_ttest_df.append(ttest_df)

    permanova_df.append({"trial":trial.number, 
                         "disease_pvalue": permanova_disease_value,
                         "status_pvalue":permanova_status_value})
    # Save the trained model
    trained_models[trial.number] = model.state_dict()

    return best_test_loss  # We aim to minimize test loss

# Run Optuna optimization

n_trials = 20
study = optuna.create_study(direction="minimize")  # Minimize validation loss
study.optimize(objective, n_trials=n_trials)  # Run 10 trials

# Convert results to DataFrame
results_df = pd.DataFrame(trial_results)

[I 2025-02-14 14:01:44,324] A new study created in memory with name: no-name-196fd655-d319-45bd-b8f5-2c69b00e082a


In [None]:
TRIAL_DF = pd.DataFrame()
for i in range(num_trials):
    
    df = pd.DataFrame(trial_results[i]['history'])
    df['trial'] = trial_results[i]['trial']
    df[['batch_size', 'lr', 'embedding_dim', 'latent_dim']] = list(trial_results[i]['params'].values())

    TRIAL_DF = pd.concat([TRIAL_DF, df])