# Parameter Optimization with Optuna

In this example we will train a RandomForest model and optimize its parameters using [Optuna](https://optuna.readthedocs.io/en/stable/).
This example is an adapted version from the Optuna [Basic Concept example](https://optuna.readthedocs.io/en/stable/#basic-concepts).



In [1]:
from zntrack import config
config.nb_name = 'parameter_optimization.ipynb'

In [2]:
from zntrack.utils import cwd_temp_dir
temp_dir = cwd_temp_dir()

In [3]:
!git init
!dvc init

Initialized empty Git repository in /tmp/tmpvoiw83fu/.git/
Initialized DVC repository.

You can now commit the changes to git.

+---------------------------------------------------------------------+
|                                                                     |
|        DVC has enabled anonymous aggregate usage analytics.         |
|     Read the analytics documentation (and how to opt-out) here:     |
|             <https://dvc.org/doc/user-guide/analytics>              |
|                                                                     |
+---------------------------------------------------------------------+

What's next?
------------
- Check out the documentation: <https://dvc.org/doc>
- Get help and share ideas: <https://dvc.org/chat>
- Star us on GitHub: <https://github.com/iterative/dvc>


[![](https://mermaid.ink/img/pako:eNp1jz0PgjAQhv8KuVkG1InBCY2zsFmHCz2wSXslpdUYwn_3YpSwuF2e572vCVqvCUrorH-2dwwxayrFKrLg4qrg-ECbMJKC2w9vBZ99Gg33FUasKa7kTuQFWXt38oHGtdqLagIaboTXgzVruc3zwye0DPqCYmn_B3aKYQOOgkOj5ZVJcZYpiHdycnYppaYOk5V1imeJYoq-fnELZQyJNpAGLR9WBvuADsoO7UjzG6bTY5I?type=png)](https://mermaid.live/edit#pako:eNp1jz0PgjAQhv8KuVkG1InBCY2zsFmHCz2wSXslpdUYwn_3YpSwuF2e572vCVqvCUrorH-2dwwxayrFKrLg4qrg-ECbMJKC2w9vBZ99Gg33FUasKa7kTuQFWXt38oHGtdqLagIaboTXgzVruc3zwye0DPqCYmn_B3aKYQOOgkOj5ZVJcZYpiHdycnYppaYOk5V1imeJYoq-fnELZQyJNpAGLR9WBvuADsoO7UjzG6bTY5I)

In [4]:
import optuna, sklearn, zntrack
import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection

class HousingDataSet(zntrack.Node):
    data = zntrack.dvc.outs("scikit_learn_data")

    def run(self) -> None:
        _ = sklearn.datasets.fetch_california_housing(data_home=self.data, return_X_y=True)
    
    @property
    def labels(self) -> dict:
        _, labels = sklearn.datasets.fetch_california_housing(data_home=self.data, return_X_y=True)
        return labels

    @property
    def features(self) -> dict:
        features, _ = sklearn.datasets.fetch_california_housing(data_home=self.data, return_X_y=True)
        return features

class TrainTestSplit(zntrack.Node):
    labels = zntrack.zn.deps()
    features = zntrack.zn.deps()
    seed = zntrack.zn.params(1234)

    train_features = zntrack.zn.outs()
    test_features = zntrack.zn.outs()
    train_labels = zntrack.zn.outs()
    test_labels = zntrack.zn.outs()

    def run(self) -> None:
        self.train_features, self.test_features, self.train_labels, self.test_labels = sklearn.model_selection.train_test_split(
            self.features, self.labels, test_size=0.2, random_state=self.seed
        )

class RandomForest(zntrack.Node):
    train_features = zntrack.zn.deps()
    train_labels = zntrack.zn.deps()
    seed = zntrack.zn.params(1234)
    max_depth = zntrack.zn.params(5)

    model = zntrack.zn.outs()

    def run(self) -> None:
        self.model = sklearn.ensemble.RandomForestRegressor(random_state=self.seed, max_depth=self.max_depth)
        self.model.fit(self.train_features, self.train_labels)

class Evaluate(zntrack.Node):
    model = zntrack.zn.deps()
    test_features = zntrack.zn.deps()
    test_labels = zntrack.zn.deps()

    score = zntrack.zn.metrics()

    def run(self) -> None:
        self.score = self.model.score(self.test_features, self.test_labels)

In [5]:
with zntrack.Project() as project:
    data = HousingDataSet()
    split = TrainTestSplit(labels=data.labels, features=data.features)
    model = RandomForest(split.train_features, split.train_labels)
    evaluate = Evaluate(model.model, split.test_features, split.test_labels)

project.run()

Running DVC command: 'stage add --name HousingDataSet --force ...'
Jupyter support is an experimental feature! Please save your notebook before running this command!
Submit issues to https://github.com/zincware/ZnTrack.
 

 

[NbConvertApp] Converting notebook parameter_optimization.ipynb to script
[NbConvertApp] Writing 3959 bytes to parameter_optimization.py
Running DVC command: 'stage add --name TrainTestSplit --force ...'
 

 

[NbConvertApp] Converting notebook parameter_optimization.ipynb to script
[NbConvertApp] Writing 3959 bytes to parameter_optimization.py
Running DVC command: 'stage add --name RandomForest --force ...'
 

 

[NbConvertApp] Converting notebook parameter_optimization.ipynb to script
[NbConvertApp] Writing 3959 bytes to parameter_optimization.py
Running DVC command: 'stage add --name Evaluate --force ...'
 

 

[NbConvertApp] Converting notebook parameter_optimization.ipynb to script
[NbConvertApp] Writing 3959 bytes to parameter_optimization.py
Running DVC command: 'repro'


In [6]:
!git add .
!git commit -m "initial commit"

 [main (root-commit) e97b1e8] initial commit
 20 files changed, 956 insertions(+)
 create mode 100644 .dvc/.gitignore
 create mode 100644 .dvc/config
 create mode 100644 .dvcignore
 create mode 100644 .gitignore
 create mode 100644 dvc.lock
 create mode 100644 dvc.yaml
 create mode 100644 nodes/Evaluate/score.json
 create mode 100644 nodes/RandomForest/.gitignore
 create mode 100644 nodes/TrainTestSplit/.gitignore
 create mode 100644 parameter_optimization.ipynb
 create mode 100644 params.yaml
 create mode 100644 src/Evaluate.py
 create mode 100644 src/HousingDataSet.py
 create mode 100644 src/RandomForest.py
 create mode 100644 src/TrainTestSplit.py
 create mode 100644 src/__pycache__/Evaluate.cpython-310.pyc
 create mode 100644 src/__pycache__/HousingDataSet.cpython-310.pyc
 create mode 100644 src/__pycache__/RandomForest.cpython-310.pyc
 create mode 100644 src/__pycache__/TrainTestSplit.cpython-310.pyc
 create mode 100644 zntrack.json


 

In [7]:
def objective(trial):
    with project.create_experiment(queue=False, name=f"exp-{trial.number}") as exp:
        model.max_depth = trial.suggest_int("max_depth", 2, 32)
    
    return exp[evaluate.name].score

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

[I 2023-05-25 15:57:08,485] A new study created in memory with name: no-name-66fb6eaf-a176-4474-8a53-56fddb4b2538
[NbConvertApp] Converting notebook parameter_optimization.ipynb to script
[NbConvertApp] Writing 3959 bytes to parameter_optimization.py
[NbConvertApp] Converting notebook parameter_optimization.ipynb to script
[NbConvertApp] Writing 3959 bytes to parameter_optimization.py
[NbConvertApp] Converting notebook parameter_optimization.ipynb to script
[NbConvertApp] Writing 3959 bytes to parameter_optimization.py
[NbConvertApp] Converting notebook parameter_optimization.ipynb to script
[NbConvertApp] Writing 3959 bytes to parameter_optimization.py
[I 2023-05-25 15:57:25,428] Trial 0 finished with value: 0.8051355813484385 and parameters: {'max_depth': 17}. Best is trial 0 with value: 0.8051355813484385.
[NbConvertApp] Converting notebook parameter_optimization.ipynb to script
[NbConvertApp] Writing 3959 bytes to parameter_optimization.py
[NbConvertApp] Converting notebook paramet

In [8]:
study.best_params

{'max_depth': 25}

In [9]:
best_model = model.from_rev(rev=f"exp-{study.best_trial.number}")
best_model.max_depth

25

In [10]:
temp_dir.cleanup()