# Installs & Imports

In [1]:
!pip install -q optuna cmaes

In [2]:
import optuna
from optuna.samplers import TPESampler, RandomSampler, CmaEsSampler
import tensorflow as tf
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import StratifiedKFold

# Load the Data

In [3]:
# Function to load and preprocess the data
def load_and_preprocess_data():
    # Load the dataset
    data = load_iris()
    df = pd.DataFrame(data.data, columns=data.feature_names)
    df['target'] = data.target

    # Split the data into features and target
    X = df.drop('target', axis=1)
    y = df['target']

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train.values, y_test.values

In [4]:
# Load and preprocess the data
X_train, X_test, y_train, y_test = load_and_preprocess_data()

# Print shapes
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (120, 4)
X_test shape: (30, 4)
y_train shape: (120,)
y_test shape: (30,)


# Model Optimization

In [5]:
# Function to define and compile the model based on the optimizer and its parameters
def compile_model(trial):
    model = tf.keras.Sequential([
        layers.Dense(trial.suggest_int('units1', 16, 128), activation='relu'),
        layers.Dense(trial.suggest_int('units2', 16, 128), activation='relu'),
        layers.Dense(3, activation='softmax')
    ])

    # Select and configure the optimizer
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)

    if optimizer_name == 'Adam':
        beta_1 = trial.suggest_float('beta_1', 0.8, 0.999)
        beta_2 = trial.suggest_float('beta_2', 0.8, 0.999)
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
    elif optimizer_name == 'RMSprop':
        rho = trial.suggest_float('rho', 0.8, 0.999)
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=rho)
    else:
        momentum = trial.suggest_float('momentum', 0.0, 0.9)
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum)

    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [6]:
# Function to perform K-Fold Cross-Validation and return the average validation accuracy
def model_optimization(trial, X_train, y_train):
    model = compile_model(trial)

    skf = StratifiedKFold(n_splits=5)
    accuracies = []

    for train_index, val_index in skf.split(X_train, y_train):
        X_tr, X_val = X_train[train_index], X_train[val_index]
        y_tr, y_val = y_train[train_index], y_train[val_index]

        history = model.fit(X_tr, y_tr, validation_data=(X_val, y_val),
                            epochs=5, batch_size=trial.suggest_int('batch_size', 32, 128), verbose=0)

        val_acc = history.history['val_accuracy'][-1]
        accuracies.append(val_acc)

    return sum(accuracies) / len(accuracies)

# Study Optimization

In [7]:
# Function to run the study optimization with the appropriate sampler strategy
def study_optimization(trial, X_train, y_train):
    # Select the pruner
    pruner_name = trial.suggest_categorical('pruner', ['MedianPruner', 'HyperbandPruner', 'NopPruner'])
    pruner = getattr(optuna.pruners, pruner_name)()

    # Select the sampler
    sampler_name = trial.suggest_categorical('sampler', ['TPESampler', 'RandomSampler', 'CmaEsSampler'])

    if sampler_name == 'CmaEsSampler':
        # If CmaEsSampler is selected, use TPESampler for categorical parameters
        base_sampler = TPESampler()

        def inner_study_optimization(inner_trial):
            optimizer_name = inner_trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])
            return model_optimization(inner_trial, X_train, y_train)

        # Create a nested study with CmaEsSampler for continuous parameters
        continuous_sampler = CmaEsSampler(warn_independent_sampling=False)
        nested_study = optuna.create_study(direction='maximize', sampler=continuous_sampler, pruner=pruner)
        nested_study.optimize(inner_study_optimization, n_trials=20)

        return nested_study.best_value

    else:
        # If TPESampler or RandomSampler is selected, use it for the whole optimization
        sampler = getattr(optuna.samplers, sampler_name)()
        study = optuna.create_study(direction='maximize', sampler=sampler, pruner=pruner)
        study.optimize(lambda t: model_optimization(t, X_train, y_train), n_trials=20)

        return study.best_value

# Meta-Optimization

In [8]:
# Function to run the meta-optimization to find the best study parameters
def run_meta_optimization(X_train, y_train):
    meta_study = optuna.create_study(direction='maximize')
    meta_study.optimize(lambda t: study_optimization(t, X_train, y_train), n_trials=10)
    return meta_study

In [9]:
# Meta-optimization to optimize the Optuna study parameters
meta_study = run_meta_optimization(X_train, y_train)

[I 2024-08-26 00:59:33,069] A new study created in memory with name: no-name-67eb8551-82ef-4f52-abc8-c41efcf70bff
[I 2024-08-26 00:59:33,073] A new study created in memory with name: no-name-5897a4e0-9698-47f7-bcb2-0b61fef9122e
[I 2024-08-26 00:59:39,509] Trial 0 finished with value: 0.875 and parameters: {'units1': 70, 'units2': 56, 'optimizer': 'RMSprop', 'learning_rate': 0.001223789846088445, 'rho': 0.9486317224041942, 'batch_size': 48}. Best is trial 0 with value: 0.875.
[I 2024-08-26 00:59:51,012] Trial 1 finished with value: 0.5666666746139526 and parameters: {'units1': 37, 'units2': 97, 'optimizer': 'SGD', 'learning_rate': 0.007729566253774043, 'momentum': 0.3970202272784246, 'batch_size': 96}. Best is trial 0 with value: 0.875.
[I 2024-08-26 01:00:05,002] Trial 2 finished with value: 0.7666666746139527 and parameters: {'units1': 62, 'units2': 55, 'optimizer': 'Adam', 'learning_rate': 0.00034898055449182196, 'beta_1': 0.800914380337444, 'beta_2': 0.8855966600473282, 'batch_size'

In [10]:
# Function to get the best study and model parameters
def get_best_study_and_params(meta_study):
    best_trial = meta_study.best_trial
    print('Best meta-optimized study:')
    print(f'  Value: {best_trial.value}')
    print(f'  Parameters: {best_trial.params}')

    # Determine the best sampler and pruner
    sampler_name = best_trial.params['sampler']
    pruner = getattr(optuna.pruners, best_trial.params['pruner'])()

    if sampler_name == 'CmaEsSampler':
        # For CmaEsSampler, use TPESampler for categorical parameters
        sampler = CmaEsSampler(warn_independent_sampling=False)
    else:
        sampler = getattr(optuna.samplers, sampler_name)()

    return sampler, pruner, best_trial.params

In [11]:
# Get the best study parameters
sampler, pruner, params = get_best_study_and_params(meta_study)

Best meta-optimized study:
  Value: 0.9749999880790711
  Parameters: {'pruner': 'MedianPruner', 'sampler': 'TPESampler'}


In [12]:
# Function to get the optimizer based on the best parameters
def get_optimizer(params):
    optimizer_name = params['optimizer']
    learning_rate = params['learning_rate']

    if optimizer_name == 'Adam':
        return tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=params['beta_1'], beta_2=params['beta_2'])
    elif optimizer_name == 'RMSprop':
        return tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=params['rho'])
    else:
        return tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=params['momentum'])

In [13]:
# Function to build and compile the model using the best parameters
def build_and_compile_model(params):
    model = tf.keras.Sequential([
        layers.Dense(params['units1'], activation='relu'),
        layers.Dense(params['units2'], activation='relu'),
        layers.Dense(3, activation='softmax')
    ])

    optimizer = get_optimizer(params)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

In [14]:
# Function to train the model with the best parameters
def train_model_with_best_params(sampler, pruner, params, X_train, y_train, X_test, y_test):
    def inner_study_optimization(inner_trial):
        # Use TPESampler for categorical parameters within CmaEsSampler
        optimizer_name = inner_trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])
        return model_optimization(inner_trial, X_train, y_train)

    if isinstance(sampler, CmaEsSampler):
        # When using CmaEsSampler, handle categorical parameters separately
        nested_study = optuna.create_study(direction='maximize', sampler=sampler, pruner=pruner)
        nested_study.optimize(inner_study_optimization, n_trials=20)
        model_params = nested_study.best_trial.params
    else:
        # Use the selected sampler directly
        best_study = optuna.create_study(direction='maximize', sampler=sampler, pruner=pruner)
        best_study.optimize(lambda t: model_optimization(t, X_train, y_train), n_trials=20)
        model_params = best_study.best_trial.params

    model = build_and_compile_model(model_params)
    model.fit(X_train, y_train, epochs=5, batch_size=model_params['batch_size'], verbose=0)

    return model

In [15]:
# Train the model using the best parameters
model = train_model_with_best_params(sampler, pruner, params, X_train, y_train, X_test, y_test)
print('\n')
model.summary()

[I 2024-08-26 01:11:09,334] A new study created in memory with name: no-name-d0555a2c-f3aa-471c-807a-33a6d086d5cb
[I 2024-08-26 01:11:12,703] Trial 0 finished with value: 0.7 and parameters: {'units1': 124, 'units2': 116, 'optimizer': 'Adam', 'learning_rate': 0.00018116293820070892, 'beta_1': 0.9049324003155907, 'beta_2': 0.8727720605939158, 'batch_size': 97}. Best is trial 0 with value: 0.7.
[I 2024-08-26 01:11:16,617] Trial 1 finished with value: 0.5000000059604645 and parameters: {'units1': 83, 'units2': 113, 'optimizer': 'SGD', 'learning_rate': 2.1423091006483323e-05, 'momentum': 0.6987591904596353, 'batch_size': 83}. Best is trial 0 with value: 0.7.
[I 2024-08-26 01:11:19,982] Trial 2 finished with value: 0.9666666507720947 and parameters: {'units1': 31, 'units2': 128, 'optimizer': 'Adam', 'learning_rate': 0.04679033792788358, 'beta_1': 0.8239077766956813, 'beta_2': 0.9356178712262642, 'batch_size': 34}. Best is trial 2 with value: 0.9666666507720947.
[I 2024-08-26 01:11:23,036] T





# Evaluate the Model

In [16]:
# Function to evaluate the model on the test set
def evaluate_model_on_test_set(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_classes = y_pred.argmax(axis=1)
    test_accuracy = accuracy_score(y_test, y_pred_classes)

    print(f'Test accuracy of the best model: {test_accuracy:.4f}')
    return test_accuracy

In [17]:
# Function to evaluate the best model
def evaluate_best_model(meta_study, X_train, X_test, y_train, y_test):
    sampler, pruner, params = get_best_study_and_params(meta_study)
    model = train_model_with_best_params(sampler, pruner, params, X_train, y_train, X_test, y_test)
    evaluate_model_on_test_set(model, X_test, y_test)

In [18]:
# Evaluate the best model
evaluate_best_model(meta_study, X_train, X_test, y_train, y_test)

[I 2024-08-26 01:12:18,575] A new study created in memory with name: no-name-03e9882f-0ab9-4afe-b758-e491c26bdf8f


Best meta-optimized study:
  Value: 0.9749999880790711
  Parameters: {'pruner': 'MedianPruner', 'sampler': 'TPESampler'}


[I 2024-08-26 01:12:22,331] Trial 0 finished with value: 0.9333333253860474 and parameters: {'units1': 23, 'units2': 67, 'optimizer': 'RMSprop', 'learning_rate': 0.01586500078754188, 'rho': 0.9435331774759447, 'batch_size': 118}. Best is trial 0 with value: 0.9333333253860474.
[I 2024-08-26 01:12:25,171] Trial 1 finished with value: 0.7083333373069763 and parameters: {'units1': 63, 'units2': 124, 'optimizer': 'SGD', 'learning_rate': 0.0018369402708760555, 'momentum': 0.5973867178756546, 'batch_size': 34}. Best is trial 0 with value: 0.9333333253860474.
[I 2024-08-26 01:12:28,373] Trial 2 finished with value: 0.3 and parameters: {'units1': 59, 'units2': 43, 'optimizer': 'Adam', 'learning_rate': 7.905890034049579e-05, 'beta_1': 0.9771404143270667, 'beta_2': 0.9965213417885412, 'batch_size': 93}. Best is trial 0 with value: 0.9333333253860474.
[I 2024-08-26 01:12:32,270] Trial 3 finished with value: 0.924999988079071 and parameters: {'units1': 78, 'units2': 49, 'optimizer': 'Adam', 'learn

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
Test accuracy of the best model: 0.9667
