https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/005_visualization.html#sphx-glr-download-tutorial-10-key-features-005-visualization-py

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import pandas as pd
from pathlib import Path

import optuna

# You can use Matplotlib instead of Plotly for visualization by simply replacing `optuna.visualization` with
# `optuna.visualization.matplotlib` in the following examples.
from optuna.visualization.matplotlib import plot_contour
from optuna.visualization.matplotlib import plot_edf
from optuna.visualization.matplotlib import plot_intermediate_values
from optuna.visualization.matplotlib import plot_optimization_history
from optuna.visualization.matplotlib import plot_parallel_coordinate
from optuna.visualization.matplotlib import plot_param_importances
from optuna.visualization.matplotlib import plot_rank
from optuna.visualization.matplotlib import plot_slice
from optuna.visualization.matplotlib import plot_timeline



In [3]:
from reprpo.training import train
import tyro
from reprpo.experiments import experiment_configs
from reprpo.hp.space import search_spaces

from reprpo.interventions import Interventions, DPOConfig, ReprPOConfig, ProjGradConfig
from reprpo.interventions.losses import Losses
from reprpo.interventions.transforms import Transforms

## Objective

In [4]:
SEED=42
key_metric = "acc_gain_vs_ref/oos"

torch.manual_seed(SEED)
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [5]:
# silence please
import os
from loguru import logger
logger.remove()
logger.remove()
logger.add(os.sys.stderr, level="WARNING")

os.environ["WANDB_MODE"] = "disabled"
os.environ["HF_DATASETS_OFFLINE"] = "1"
os.environ["TQDM_DISABLE"] = "true"

In [6]:
f_db = f"sqlite:///optuna.db"
f = f_db.replace('sqlite:///', './')
print(f)
Path(f).parent.mkdir(parents=True, exist_ok=True)
f_db

./../outputs/optuna/optuna.db


'sqlite:///../outputs/optuna/optuna.db'

In [7]:
from reprpo.hp.target import override, default_tuner_kwargs
from reprpo.experiments import experiment_configs
import copy
import wandb

import optuna.pruners
from optuna_integration.wandb import WeightsAndBiasesCallback

Note on pruning. It's only really usefull with validation metrics and for long jobs over many epochs. I've got a small proxy job so there is no need.

In [8]:
MAX_TRIALS= 150

def list2tuples(d):
    for k, v in d.items():
        if isinstance(v, list):
            d[k] = tuple(v)
    return d

for starter_experiment_name, trial2args in search_spaces.items():
    study_name = f"{starter_experiment_name}"

    def objective_func(kwargs, trial):
        cfg = copy.deepcopy(experiment_configs[starter_experiment_name][1])
        override(cfg, default_tuner_kwargs)
        
        override(cfg, kwargs)
        kwargs = list2tuples(kwargs)
        r = train(cfg, trial=trial)
        return r

    def objective(trial: optuna.Trial) -> float:
        kwargs = trial2args(trial)
        r = objective_func(kwargs, trial)
        return r[key_metric]

    # os.environ["WANDB_NOTEBOOK_NAME"] = f"{study_name}.ipynb"
    # wandb_kwargs = {"project": "reprpo-optuna", "name": study_name}
    # wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs)

    study = optuna.create_study(
        study_name=study_name,
        direction="maximize",
        load_if_exists=True,
        storage=f_db,
        sampler=optuna.samplers.TPESampler(seed=SEED),
        pruner=optuna.pruners.NopPruner(),
    )

    n = 0

    if len(study.trials)>0:
        df = study.trials_dataframe().query('state == "COMPLETE"').sort_values('value', ascending=False)
        n = len(df)

        print(f"loaded {n} {study_name} trials")
    if n < MAX_TRIALS:
        study.optimize(objective, 
                    n_trials=MAX_TRIALS, 
                    # callbacks=[wandbc], 
                    gc_after_trial=True, 
                    catch=(Exception,) # this will catch keyboard error, you will neede to restart kernel
        )

    print('study.best_trial', study.best_trial)

    wandb.finish(quiet=True)

[I 2024-09-30 02:58:36,753] Using an existing study with name 'projgrad' instead of creating a new one.
[I 2024-09-30 02:58:36,851] Using an existing study with name 'side-ether-prefvec' instead of creating a new one.


loaded 175 projgrad trials
study.best_trial FrozenTrial(number=70, state=1, values=[1.057915057915058], datetime_start=datetime.datetime(2024, 9, 28, 17, 25, 12, 131962), datetime_complete=datetime.datetime(2024, 9, 28, 17, 28, 13, 416392), params={'learning-rate': 0.00012426382563887213, 'β': 0.7386239719822631, 'reverse_pref': True, 'scale_orth': True, 'weight_dim': 0, 'neg_slope': 0.5}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'learning-rate': FloatDistribution(high=0.001, log=True, low=1e-06, step=None), 'β': FloatDistribution(high=1.0, log=False, low=0.0, step=None), 'reverse_pref': CategoricalDistribution(choices=(True, False)), 'scale_orth': CategoricalDistribution(choices=(True, False)), 'weight_dim': IntDistribution(high=2, log=False, low=0, step=1), 'neg_slope': CategoricalDistribution(choices=(0, 0.1, 0.5, 1))}, trial_id=71, value=None)
loaded 31 side-ether-prefvec trials


[I 2024-09-30 03:01:20,332] Trial 44 finished with value: 1.0116731517509727 and parameters: {'learning-rate': 0.00021464035477145843, 'collect_input': True, 'collect_hs': False, 'nb': 5, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 152, 'loss.β': 0.6226035469402692, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.62 lr=0.00 save=False |       0 |   -0.8 | 1.167 |     6 |


[I 2024-09-30 03:04:01,924] Trial 45 finished with value: 1.0116731517509727 and parameters: {'learning-rate': 0.00020757436511793347, 'collect_input': True, 'collect_hs': False, 'nb': 1, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 145, 'loss.β': 0.6373707455879735, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.64 lr=0.00 save=False |       0 |   -0.8 | 1.167 |     4 |


[I 2024-09-30 03:06:47,168] Trial 46 finished with value: 1.0136186770428015 and parameters: {'learning-rate': 0.0006888160729576332, 'collect_input': True, 'collect_hs': False, 'nb': 5, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 159, 'loss.β': 0.22321319116166402, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.22 lr=0.00 save=False |       0 |   -0.8 | 1.362 |     2 |


[I 2024-09-30 03:09:29,643] Trial 47 finished with value: 1.0136186770428015 and parameters: {'learning-rate': 0.0009929797083787107, 'collect_input': True, 'collect_hs': False, 'nb': 3, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 166, 'loss.β': 0.11561495742911734, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.12 lr=0.00 save=False |       0 |   -0.8 | 1.362 |     2 |


[I 2024-09-30 03:12:13,166] Trial 48 finished with value: 1.009727626459144 and parameters: {'learning-rate': 0.0009215538922070097, 'collect_input': True, 'collect_hs': False, 'nb': 3, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 169, 'loss.β': 0.14295801587444845, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.14 lr=0.00 save=False |       0 |   -0.8 | 0.973 |     4 |


[I 2024-09-30 03:14:55,839] Trial 49 finished with value: 1.0077821011673151 and parameters: {'learning-rate': 0.0007951216826524699, 'collect_input': True, 'collect_hs': False, 'nb': 7, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 135, 'loss.β': 0.03973193280847746, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': False}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.04 lr=0.00 save=False |       0 |   -0.8 | 0.778 |     2 |


[I 2024-09-30 03:17:37,541] Trial 50 finished with value: 1.0136186770428015 and parameters: {'learning-rate': 0.0004984072165931108, 'collect_input': True, 'collect_hs': False, 'nb': 1, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 181, 'loss.β': 0.13432290565296606, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.13 lr=0.00 save=False |       0 |   -0.8 | 1.362 |     2 |


[I 2024-09-30 03:20:20,307] Trial 51 finished with value: 1.0058365758754864 and parameters: {'learning-rate': 5.359532529391008e-05, 'collect_input': True, 'collect_hs': False, 'nb': 3, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 166, 'loss.β': 0.015886966491091902, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.02 lr=0.00 save=False |       0 |   -0.8 | 0.584 |     4 |


[I 2024-09-30 03:23:03,222] Trial 52 finished with value: 1.0019455252918288 and parameters: {'learning-rate': 0.0003626239564023434, 'collect_input': True, 'collect_hs': False, 'nb': 6, 'Htype': 'ether', 'flip_side': True, 'reduction': 130, 'loss.β': 0.0026026708390502946, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.00 lr=0.00 save=False |       0 |   -0.8 | 0.195 |     4 |


[I 2024-09-30 03:25:55,481] Trial 53 finished with value: 1.0038910505836576 and parameters: {'learning-rate': 0.00013414601104622023, 'collect_input': False, 'collect_hs': False, 'nb': 3, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 106, 'loss.β': 0.05231635407106709, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                  |   train |   test |   oos |   rnd |
|:--------------------------------------|--------:|-------:|------:|------:|
| ReprPO loss.β=0.05 lr=0.00 save=False |       0 |   -0.8 | 0.389 |     4 |


[I 2024-09-30 03:28:38,235] Trial 54 finished with value: 1.0116731517509727 and parameters: {'learning-rate': 0.0005634720082975826, 'collect_input': True, 'collect_hs': False, 'nb': 1, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 185, 'loss.β': 0.11686563092919451, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 38 with value: 1.0136186770428015.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.12 lr=0.00 save=False |       0 |   -0.8 | 1.167 |     2 |


[I 2024-09-30 03:31:20,673] Trial 55 finished with value: 1.017509727626459 and parameters: {'learning-rate': 0.0005060344034244899, 'collect_input': True, 'collect_hs': False, 'nb': 4, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 177, 'loss.β': 0.26337702593055473, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.26 lr=0.00 save=False |       0 |   -0.8 | 1.751 |     4 |


[I 2024-09-30 03:34:02,089] Trial 56 finished with value: 1.0058365758754864 and parameters: {'learning-rate': 0.0003151031895233303, 'collect_input': True, 'collect_hs': False, 'nb': 7, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 173, 'loss.β': 0.2500118618419787, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.25 lr=0.00 save=False |       0 |   -0.8 | 0.584 |     2 |


[I 2024-09-30 03:36:44,453] Trial 57 finished with value: 1.0136186770428015 and parameters: {'learning-rate': 0.0006359686990827014, 'collect_input': True, 'collect_hs': False, 'nb': 9, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 161, 'loss.β': 0.9381941402199833, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.94 lr=0.00 save=False |       0 |   -0.8 | 1.362 |     2 |


[I 2024-09-30 03:39:27,217] Trial 58 finished with value: 1.0058365758754864 and parameters: {'learning-rate': 0.0004333911663221619, 'collect_input': True, 'collect_hs': False, 'nb': 3, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 143, 'loss.β': 0.01971198865399563, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': False}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.02 lr=0.00 save=False |       0 |   -0.8 | 0.584 |     0 |


[I 2024-09-30 03:42:23,210] Trial 59 finished with value: 1.009727626459144 and parameters: {'learning-rate': 0.0009955312750333967, 'collect_input': False, 'collect_hs': False, 'nb': 4, 'Htype': 'ether', 'flip_side': False, 'reduction': 188, 'loss.β': 0.28260627530718135, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                  |   train |   test |   oos |   rnd |
|:--------------------------------------|--------:|-------:|------:|------:|
| ReprPO loss.β=0.28 lr=0.00 save=False |       0 |   -0.8 | 0.973 |     4 |


[I 2024-09-30 03:44:54,454] Trial 60 finished with value: 1.0116731517509727 and parameters: {'learning-rate': 7.353239375249114e-05, 'collect_input': True, 'collect_hs': True, 'nb': 7, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 174, 'loss.β': 0.6905154935027195, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': False}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                                     |   train |   test |   oos |   rnd |
|:-------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True collect_input=True loss.β=0.69 lr=0.00 save=False |       0 |   -0.8 | 1.167 |     4 |


[I 2024-09-30 03:47:35,687] Trial 61 finished with value: 1.0116731517509727 and parameters: {'learning-rate': 0.0006698153319761879, 'collect_input': True, 'collect_hs': False, 'nb': 10, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 157, 'loss.β': 0.07201129806973712, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.07 lr=0.00 save=False |       0 |   -0.8 | 1.167 |     2 |


[I 2024-09-30 03:50:27,825] Trial 62 finished with value: 1.009727626459144 and parameters: {'learning-rate': 0.00014632398322944414, 'collect_input': False, 'collect_hs': False, 'nb': 6, 'Htype': 'oft', 'flip_side': False, 'reduction': 191, 'loss.β': 0.18317806545687215, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                  |   train |   test |   oos |   rnd |
|:--------------------------------------|--------:|-------:|------:|------:|
| ReprPO loss.β=0.18 lr=0.00 save=False |       0 |   -0.8 | 0.973 |     2 |


[I 2024-09-30 03:53:00,244] Trial 63 finished with value: 1.0 and parameters: {'learning-rate': 0.0002911689596167032, 'collect_input': True, 'collect_hs': True, 'nb': 8, 'Htype': 'ether', 'flip_side': True, 'reduction': 175, 'loss.β': 0.007031666667109069, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                                     |   train |   test |   oos |   rnd |
|:-------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True collect_input=True loss.β=0.01 lr=0.00 save=False |       0 |   -0.8 |     0 |     4 |


[I 2024-09-30 03:55:42,859] Trial 64 finished with value: 1.0116731517509727 and parameters: {'learning-rate': 0.0004513584258982789, 'collect_input': True, 'collect_hs': False, 'nb': 1, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 182, 'loss.β': 0.14328636718255186, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.14 lr=0.00 save=False |       0 |   -0.8 | 1.167 |     2 |


[I 2024-09-30 03:58:25,521] Trial 65 finished with value: 1.0058365758754864 and parameters: {'learning-rate': 3.2388801541882836e-05, 'collect_input': True, 'collect_hs': False, 'nb': 2, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 189, 'loss.β': 0.030912579624553727, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.03 lr=0.00 save=False |       0 |   -0.8 | 0.584 |    -2 |


[I 2024-09-30 04:01:07,796] Trial 66 finished with value: 1.0116731517509727 and parameters: {'learning-rate': 0.0006958382897954093, 'collect_input': True, 'collect_hs': False, 'nb': 4, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 137, 'loss.β': 0.08091123978019595, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.08 lr=0.00 save=False |       0 |   -0.8 | 1.167 |     2 |


[I 2024-09-30 04:03:49,629] Trial 67 finished with value: 1.0136186770428015 and parameters: {'learning-rate': 0.0004103085893496651, 'collect_input': True, 'collect_hs': False, 'nb': 2, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 158, 'loss.β': 0.5262471526608133, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.53 lr=0.00 save=False |       0 |   -0.8 | 1.362 |     2 |


[I 2024-09-30 04:06:30,633] Trial 68 finished with value: 1.0155642023346303 and parameters: {'learning-rate': 6.710010495429388e-05, 'collect_input': True, 'collect_hs': False, 'nb': 6, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 78, 'loss.β': 0.9828509821737622, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.98 lr=0.00 save=False |       0 |   -0.8 | 1.556 |     2 |


[I 2024-09-30 04:09:16,132] Trial 69 finished with value: 1.0155642023346303 and parameters: {'learning-rate': 5.736517283522495e-05, 'collect_input': True, 'collect_hs': False, 'nb': 4, 'Htype': 'etherplusHH', 'flip_side': False, 'reduction': 82, 'loss.β': 1.0570354918666647, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': False}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=1.06 lr=0.00 save=False |       0 |   -0.8 | 1.556 |     4 |


[I 2024-09-30 04:11:48,384] Trial 70 finished with value: 1.009727626459144 and parameters: {'learning-rate': 6.159870066301065e-05, 'collect_input': False, 'collect_hs': True, 'nb': 6, 'Htype': 'etherplusHH', 'flip_side': False, 'reduction': 83, 'loss.β': 1.1029199739336155, 'use_orth_loss': True, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': True, 'weight_tokens': False}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                  |   train |   test |   oos |   rnd |
|:------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True loss.β=1.10 lr=0.00 save=False |       0 |   -0.8 | 0.973 |     6 |


[I 2024-09-30 04:14:30,466] Trial 71 finished with value: 1.0136186770428015 and parameters: {'learning-rate': 3.6348034484966665e-05, 'collect_input': True, 'collect_hs': False, 'nb': 8, 'Htype': 'etherplusHH', 'flip_side': False, 'reduction': 63, 'loss.β': 0.39092591115211256, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': False}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.39 lr=0.00 save=False |       0 |   -0.8 | 1.362 |     0 |


[I 2024-09-30 04:17:12,954] Trial 72 finished with value: 1.0116731517509727 and parameters: {'learning-rate': 4.268040284829087e-05, 'collect_input': True, 'collect_hs': False, 'nb': 10, 'Htype': 'etherplusHH', 'flip_side': False, 'reduction': 80, 'loss.β': 1.9772902880471146, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': False}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=1.98 lr=0.00 save=False |       0 |   -0.8 | 1.167 |     4 |


[I 2024-09-30 04:19:55,752] Trial 73 finished with value: 1.0077821011673151 and parameters: {'learning-rate': 9.647254312981809e-05, 'collect_input': True, 'collect_hs': False, 'nb': 4, 'Htype': 'etherplusHH', 'flip_side': False, 'reduction': 72, 'loss.β': 0.0002687424554286852, 'use_orth_loss': True, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': False}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.00 lr=0.00 save=False |       0 |   -0.8 | 0.778 |     6 |


[I 2024-09-30 04:22:37,235] Trial 74 finished with value: 1.009727626459144 and parameters: {'learning-rate': 2.4853233469094322e-05, 'collect_input': True, 'collect_hs': False, 'nb': 5, 'Htype': 'etherplusHH', 'flip_side': False, 'reduction': 42, 'loss.β': 1.1126606157105394, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': False}. Best is trial 55 with value: 1.017509727626459.



| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=1.11 lr=0.00 save=False |       0 |   -0.8 | 0.973 |     0 |

| acc_inc/eval_ds [pp]                                     |   train |   test |   oos |   rnd |
|:---------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True loss.β=0.29 lr=0.00 save=False |       0 |   -0.8 | 0.778 |     4 |


AssertionError: Should not reach.

In [None]:
%debug

In [22]:
# plot_timeline(study)

In [None]:
plot_optimization_history(study)

In [16]:
# plot_intermediate_values(study)

In [21]:
# plot_contour(study)


In [None]:
plot_slice(study)


In [None]:
plot_param_importances(study)

### Apendix 1: dataclass 2 optuna

In [20]:
# import inspect
# import typing
# from typing import Literal

# def optuna_suggest_from_dataclass(t):
#     n = t.__name__
#     print(f'## {n}')
#     sig = inspect.signature(t)
#     for name, param in sig.parameters.items():
#         if param.annotation== bool:
#             print(f'"{name}": trial.suggest_categorical("{name}", [True, False]),')
#         elif param.annotation==int:
#             print(f'"{name}": trial.suggest_int("{name}", 1, 10),')
#         elif param.annotation ==float:
#             print(f'"{name}": trial.suggest_float("{name}", 0.1, 10.0),')
#         elif param.annotation == str:
#             print(f'"{name}": trial.suggest_categorical("{name}", ["a", "b", "c"]),')
#         elif param.annotation == tuple:
#             print(f'"{name}": trial.suggest_categorical("{name}", [(1, 2), (3, 4), (5, 6)]),')
#         elif typing.get_origin(param.annotation) == Literal:
#             print(f'"{name}": trial.suggest_categorical("{name}", {param.annotation.__args__}),')
#         else:
#             print(f"!!Unknown type {param}")
#             # print(name, param.default, param.annotation)

# optuna_suggest_from_dataclass(ReprPOConfig)
# for t in Transforms:
#     print(f'## {t}')
#     optuna_suggest_from_dataclass(t.value)
# for l in Losses:
#     print(f'## {l}')
#     optuna_suggest_from_dataclass(l.value)


# optuna_suggest_from_dataclass(DPOProjGradConfig)