# Demonstration -- Using `MEMENTO` to tune hyperparameters

This demo showcases hyperparameter turning using `MEMENTO`.
This is similar to `sklearn.model_selection.GridSearchCV`.
The advantages of using `MEMENTO` are:

1. You have better control on the entire pipeline. This is useful when more than 1 step requires hyperparameter tuning.
2. `MEMENTO` caches and checkpoints your results, so if the experiment take a while to run, you don't have to worry about loss your results.


In [1]:
import functools
import logging

import numpy as np
from sklearn import datasets
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

from memento import Config, ConsoleNotificationProvider, Context, Memento


In [2]:
logging.basicConfig(level=logging.INFO)


In [3]:
# Using `partial` to avoid passing parameter.
load_breast_cancer = functools.partial(datasets.load_breast_cancer, return_X_y=True)

# Put all parameters in a configuration matrix
matrix = {
    "parameters": {
        "dataset": [
            load_breast_cancer,
        ],
        "preprocessing": [
            StandardScaler(),
        ],
        "classifier": [
            SVC,
        ],
        "svm_C": [1, 10, 100, 1000],
        "svm_gamma": [0.001, 0.0001],  # These are the hyperparameters we want to tune.
    },
    "settings": {  # Set global values here
        "n_fold": 5,  # Running a 5-fold CV
    },
}


In [4]:
def experiment(context: Context, config: Config):
    """This block contains the experiment with one set of parameters."""
    if context.checkpoint_exist():
        scores = context.restore()
    else:
        # Access parameter:
        X, y = config.dataset()
        svm_C = config.svm_C
        svm_gamma = config.svm_gamma
        clf = config.classifier(C=svm_C, gamma=svm_gamma)

        # Access the global constant:
        cv = config.settings["n_fold"]

        # Build and run the pipeline:
        pipeline = make_pipeline(config.preprocessing, clf)
        scores = cross_val_score(pipeline, X, y, cv=cv)

        # Save results to the checkpoint:
        context.checkpoint(scores)
    return scores.mean() * 100


In [5]:
notification_provider = ConsoleNotificationProvider()
results = Memento(experiment, notification_provider).run(matrix)


INFO:memento.memento:Running configurations:
INFO:memento.memento:  {'dataset': functools.partial(<function load_breast_cancer at 0x00000295FB2D33A0>, return_X_y=True), 'preprocessing': StandardScaler(), 'classifier': <class 'sklearn.svm._classes.SVC'>, 'svm_C': 1, 'svm_gamma': 0.001}
INFO:memento.memento:  {'dataset': functools.partial(<function load_breast_cancer at 0x00000295FB2D33A0>, return_X_y=True), 'preprocessing': StandardScaler(), 'classifier': <class 'sklearn.svm._classes.SVC'>, 'svm_C': 1, 'svm_gamma': 0.0001}
INFO:memento.memento:  {'dataset': functools.partial(<function load_breast_cancer at 0x00000295FB2D33A0>, return_X_y=True), 'preprocessing': StandardScaler(), 'classifier': <class 'sklearn.svm._classes.SVC'>, 'svm_C': 10, 'svm_gamma': 0.001}
INFO:memento.memento:  {'dataset': functools.partial(<function load_breast_cancer at 0x00000295FB2D33A0>, return_X_y=True), 'preprocessing': StandardScaler(), 'classifier': <class 'sklearn.svm._classes.SVC'>, 'svm_C': 10, 'svm_gam

All tasks completed


In [6]:
avg_accs = np.round([result.inner for result in results], 2)
print(*avg_accs, sep="\n")


94.73
79.62
97.01
94.73
97.01
97.19
97.37
97.19


In [7]:
print("         Best result:", np.max(avg_accs))
print("Index of best result:", np.argmax(avg_accs))


         Best result: 97.37
Index of best result: 6
