https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/005_visualization.html#sphx-glr-download-tutorial-10-key-features-005-visualization-py

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] ="expandable_segments:True" # seems to stop gpu mem from filling up despite clearing

In [3]:
import torch
import pandas as pd
from pathlib import Path
import optuna
from reprpo.hp.helpers import optuna_df

In [4]:
from reprpo.training import train
from reprpo.experiments import experiment_configs
from reprpo.hp.space import search_spaces

[2024-10-06 00:53:29,043] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


## Objective

In [5]:
SEED=42
key_metric = "acc_gain_vs_ref/oos"
torch.manual_seed(SEED)
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [6]:
# silence please
import os
from loguru import logger
logger.remove()
logger.remove()
logger.add(os.sys.stderr, level="WARNING")

os.environ["WANDB_MODE"] = "disabled"
os.environ["HF_DATASETS_OFFLINE"] = "1"
os.environ["TQDM_DISABLE"] = "true"

In [7]:
f_db = f"sqlite:///optuna.db"
f = f_db.replace('sqlite:///', './')
print(f)
Path(f).parent.mkdir(parents=True, exist_ok=True)
f_db

./optuna.db


'sqlite:///optuna.db'

In [8]:
# print(f'to visualise run in cli\ncd nbs\noptuna-dashboard {f_db}')

In [9]:
from reprpo.hp.target import override, default_tuner_kwargs
from reprpo.experiments import experiment_configs
import copy
import wandb

import optuna.pruners
from optuna_integration.wandb import WeightsAndBiasesCallback

In [10]:


import functools

def list2tuples(d):
    for k, v in d.items():
        if isinstance(v, list):
            d[k] = tuple(v)
    return d

def objective_func(kwargs, trial, starter_experiment_name):
    cfg = copy.deepcopy(experiment_configs[starter_experiment_name][1])
    override(cfg, default_tuner_kwargs)
    override(cfg, kwargs)
    kwargs = list2tuples(kwargs)
    r = train(cfg, trial=trial)
    return r

def objective(trial: optuna.Trial, starter_experiment_name, trial2args, key_metric=key_metric) -> float:
    kwargs = trial2args(trial)
    r = objective_func(kwargs, trial, starter_experiment_name)
    return r[key_metric]



## Opt

Note on pruning. It's only really usefull with validation metrics and for long jobs over many epochs. I've got a small proxy job so there is no need.

In [11]:
# from reprpo.experiments import experiment_configs
from reprpo.hp.space import experiment_configs
experiment_configs.keys()

dict_keys(['hs-svd-mse', 'hs-hra-rank', 'hs-ortho-prefvec', 'ether-prefvec', 'dpo', 'projbp', 'projgrad2'])

In [12]:
import warnings
warnings.filterwarnings("ignore", category=optuna.exceptions.ExperimentalWarning) 

In [13]:
from optuna.study.study import storages, get_all_study_names
study_names = get_all_study_names(storage=f_db)

for study_name in study_names:
    print(study_name)
    study = optuna.load_study(study_name=study_name, storage=f_db)
    try:
        df_res = optuna_df(study, key_metric)
        display(df_res)
        print()
    except ValueError as e:
        print('-')

projgrad2


[W 2024-10-06 00:54:26,331] Study instance does not contain completed trials.


Unnamed: 0_level_0,importance,best
"projgrad2 N=✓0/✖307, best=nan",Unnamed: 1_level_1,Unnamed: 2_level_1



side-ether-prefvec


Unnamed: 0_level_0,importance,best
"side-ether-prefvec N=✓208/✖209, best=1.169",Unnamed: 1_level_1,Unnamed: 2_level_1
lr,0.368,0.000615
nb,0.219,30
β,0.218,0.403787
reduction,0.146,25
flip_side,0.014,True
use_dpo_loss,0.013,False
collect_hs,0.011,False
Htype,0.01,oft
use_nll_loss,0.0,False
weight_tokens,0.0,False


[W 2024-10-06 00:54:26,974] Study instance does not contain completed trials.



projgrad


Unnamed: 0_level_0,importance,best
"projgrad N=✓0/✖4, best=nan",Unnamed: 1_level_1,Unnamed: 2_level_1



side-svd-mse


Unnamed: 0_level_0,importance,best
"side-svd-mse N=✓28/✖316, best=1.010",Unnamed: 1_level_1,Unnamed: 2_level_1
α,0.844,0.635584
lr,0.126,0.001195
quantile,0.016,float
collect_hs,0.005,True
collect_input,0.005,False
dual_svd,0.005,True
quantile_value,,0.3



side-hra-rank


Unnamed: 0_level_0,importance,best
"side-hra-rank N=✓182/✖183, best=1.229",Unnamed: 1_level_1,Unnamed: 2_level_1
β,0.441,0.110393
lr,0.417,0.000188
α,0.095,5.920778
r,0.03,2
apply_GS,0.017,False
collect_hs,0.0,False
collect_input,0.0,False



hs-ortho-prefvec


Unnamed: 0_level_0,importance,best
"hs-ortho-prefvec N=✓20/✖20, best=1.118",Unnamed: 1_level_1,Unnamed: 2_level_1
lr,0.782,0.000125
β,0.161,0.341233
use_nll_loss,0.019,False
use_proj_rel,0.019,True
use_angle_loss,0.005,True
use_dpo_loss,0.005,True
weight_tokens,0.005,True
orthogonal_map,0.004,matrix_exp
use_orth_loss,0.001,False



projbp


Unnamed: 0_level_0,importance,best
"projbp N=✓10/✖24, best=1.033",Unnamed: 1_level_1,Unnamed: 2_level_1
β,0.512,0.366362
lr,0.311,0.000003
scale_orth,0.128,False
mag_clip,0.021,
neg_slope,0.014,0
reverse_pref,0.014,False



dpo


Unnamed: 0_level_0,importance,best
"dpo N=✓8/✖10, best=1.087",Unnamed: 1_level_1,Unnamed: 2_level_1
lr,1.0,9.8e-05



hs-svd-mse


Unnamed: 0_level_0,importance,best
"hs-svd-mse N=✓14/✖332, best=1.017",Unnamed: 1_level_1,Unnamed: 2_level_1
lr,0.752,0.001195
α,0.189,0.635584
collect_input,0.056,False
collect_hs,0.003,True
dual_svd,0.0,True
quantile,0.0,float
quantile_value,,0.3



hs-hra-rank


Unnamed: 0_level_0,importance,best
"hs-hra-rank N=✓43/✖45, best=1.087",Unnamed: 1_level_1,Unnamed: 2_level_1
lr,0.907,0.000158
r,0.048,96
β,0.039,18.156422
α,0.005,0.123722
apply_GS,0.002,False



ether-prefvec


Unnamed: 0_level_0,importance,best
ether-prefvec N=3,Unnamed: 1_level_1,Unnamed: 2_level_1





In [15]:
# unit test
for exp_name, (N, trial2args) in search_spaces.items():
    study = optuna.create_study(direction="maximize")
    cfg = copy.deepcopy(experiment_configs[exp_name][1])
    print('exp_name', exp_name)
    for _ in range(10):
        trial = study.ask()
        kwargs = trial2args(trial)
        override(cfg, default_tuner_kwargs)
        override(cfg, kwargs)
        kwargs = list2tuples(kwargs)
        # print()
    print('kwargs', kwargs)
    print('='*100)
    print()

    # TODO get float * 50 + categories * 25

    # try:
    #     df_res = optuna_df(study, key_metric)
    #     print(df_res.to_markdown())
    # except Exception as e:
    #     print(e)

[I 2024-10-06 00:54:53,760] A new study created in memory with name: no-name-34dae79d-d0e6-4f47-b94d-61fbacd30677
[I 2024-10-06 00:54:53,774] A new study created in memory with name: no-name-03940010-97ac-4136-b4e1-17ddfa657803
[I 2024-10-06 00:54:53,784] A new study created in memory with name: no-name-737d0f6c-4539-461f-99c2-1b11bbd028ad
[I 2024-10-06 00:54:53,796] A new study created in memory with name: no-name-6dcda2eb-c085-43bf-bca4-4c2a297c6bc9
[I 2024-10-06 00:54:53,816] A new study created in memory with name: no-name-966d6188-a1b6-4075-a088-bd42eb6177cd
[I 2024-10-06 00:54:53,824] A new study created in memory with name: no-name-811f712a-bc11-4397-9287-334814c2b657
[I 2024-10-06 00:54:53,831] A new study created in memory with name: no-name-578668dd-46e8-43c1-bf2d-6ae8761b5a19


exp_name hs-svd-mse
kwargs {'lr': 2.4008970673824455e-05, 'collect_input': True, 'collect_hs': True, 'transform.quantile': 1, 'transform.dual_svd': True, 'loss.α': 4.659412764403834}

exp_name hs-hra-rank
kwargs {'lr': 1.0639442660721646e-06, 'transform.r': 37, 'transform.apply_GS': False, 'loss.α': 2606.3940587280713, 'loss.β': 49.21176557089369}

exp_name hs-ortho-prefvec
kwargs {'lr': 0.006940820649722095, 'transform.orthogonal_map': 'cayley', 'loss.β': 0.00036668618914569695, 'loss.use_orth_loss': False, 'loss.use_angle_loss': False, 'loss.use_dpo_loss': False, 'loss.use_nll_loss': True, 'loss.weight_tokens': False, 'loss.use_proj_rel': True}

exp_name ether-prefvec
kwargs {'lr': 0.005718612175996021, 'collect_input': False, 'collect_hs': True, 'transform.nb': 4, 'transform.Htype': 'oft', 'transform.flip_side': False, 'transform.reduction': 10, 'loss.β': 0.015625783471675713, 'loss.use_orth_loss': False, 'loss.use_angle_loss': True, 'loss.use_dpo_loss': True, 'loss.use_nll_loss': F

In [16]:
# from optuna import trial
# t = trial.create_trial(value=1)
# t.suggest_categorical("a", [1, 2, 3])

In [17]:
MAX_TRIALS= 250
import numpy as np
spaces = list(search_spaces.items())
while True:
    np.random.shuffle(spaces)
    for exp_name, (max_trials, trial2args) in spaces:
        try:
            study_name = f"{exp_name}"
            study = optuna.create_study(
                study_name=study_name,
                direction="maximize",
                load_if_exists=True,
                storage=f_db,
                sampler=optuna.samplers.TPESampler(seed=SEED),
                # pruner=optuna.pruners.NopPruner(),
            )

            n = 0
            try:
                df = study.trials_dataframe().sort_values('value', ascending=False)
                n = len(df)
            except Exception as e:
                print(e)
                pass
            if n>0:
                print(f"loaded {n} {study_name} trials")

                df_res = optuna_df(study, key_metric)
                print(df_res.to_markdown())

            
            if n < max_trials:
                _objective = functools.partial(objective, key_metric=key_metric, starter_experiment_name=exp_name, trial2args=trial2args)

                study.optimize(_objective, 
                            n_trials=20, # do 20 at a time, round robin, untill done
                            gc_after_trial=True, 
                            catch=(AssertionError, OSError, RuntimeError, KeyError, torch.OutOfMemoryError)
                )

            print('='*80)
        except KeyboardInterrupt:
            break
        except Exception as e:
            logger.exception(e)

[I 2024-10-06 00:55:01,659] Using an existing study with name 'ether-prefvec' instead of creating a new one.


loaded 3 ether-prefvec trials
| ether-prefvec N=3   | importance   | best   |
|---------------------|--------------|--------|


[I 2024-10-06 00:58:49,858] Trial 3 finished with value: 1.0096339113680155 and parameters: {'lr': 7.45934328572655e-06, 'collect_input': True, 'collect_hs': True, 'nb': 1, 'Htype': 'etherplus', 'flip_side': False, 'reduction': 160, 'β': 2.177484667394932e-05, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': False}. Best is trial 0 with value: 1.0096339113680155.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                  |   train |   test |   oos |   rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True collect_input=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.use_nll_prefvec=True prefvec.β=2.2e-05 lr=7.5e-06 ether.Htype=etherplus ether.nb=1 ether.reduction=160 |   0.826 |      0 | 0.963 | 3.509 |


[I 2024-10-06 01:02:30,339] Trial 4 finished with value: 1.0 and parameters: {'lr': 9.96251322205511e-07, 'collect_input': False, 'collect_hs': False, 'nb': 1, 'Htype': 'oft', 'flip_side': True, 'reduction': 57, 'β': 0.000593490901937937, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 0 with value: 1.0096339113680155.



| acc_inc/eval_ds [pp]                                                                                                                                                                      |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO prefvec.use_angle_prefvec=False prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.00059 lr=1e-06 ether.Htype=oft ether.flip_side=True ether.nb=1 ether.reduction=57 |       0 |      0 |     0 | -3.509 |


[I 2024-10-06 01:06:08,768] Trial 5 finished with value: 0.5202312138728324 and parameters: {'lr': 0.0049830438374949175, 'collect_input': True, 'collect_hs': True, 'nb': 1, 'Htype': 'oft', 'flip_side': True, 'reduction': 4, 'β': 0.0026275095216295235, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': False, 'use_proj_rel': True}. Best is trial 0 with value: 1.0096339113680155.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                         |   train |    test |     oos |    rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_hs=True collect_input=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.β=0.0026 lr=0.005 ether.Htype=oft ether.flip_side=True ether.nb=1 ether.reduction=4 | -21.488 | -19.685 | -47.977 | 12.281 |


[I 2024-10-06 01:09:43,949] Trial 6 finished with value: 1.0134874759152217 and parameters: {'lr': 6.199100007802271e-06, 'collect_input': False, 'collect_hs': True, 'nb': 1, 'Htype': 'oft', 'flip_side': True, 'reduction': 1, 'β': 0.03120159522837056, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': False, 'use_proj_rel': False}. Best is trial 6 with value: 1.0134874759152217.



| acc_inc/eval_ds [pp]                                                                                                                                                           |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_orth_prefvec=True prefvec.β=0.031 lr=6.2e-06 ether.Htype=oft ether.flip_side=True ether.nb=1 ether.reduction=1 |   0.826 |      0 | 1.349 | -3.509 |


[I 2024-10-06 01:13:12,164] Trial 7 finished with value: 0.39884393063583823 and parameters: {'lr': 0.003450105453613023, 'collect_input': False, 'collect_hs': True, 'nb': 1, 'Htype': 'oft', 'flip_side': False, 'reduction': 131, 'β': 1.4982474159458828e-05, 'use_orth_loss': True, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': False, 'use_proj_rel': False}. Best is trial 6 with value: 1.0134874759152217.



| acc_inc/eval_ds [pp]                                                                                                                                                                         |   train |    test |     oos |   rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.use_orth_prefvec=True prefvec.β=1.5e-05 lr=0.0035 ether.Htype=oft ether.nb=1 ether.reduction=131 | -57.851 | -51.969 | -60.116 | 3.509 |


[I 2024-10-06 01:16:37,312] Trial 8 finished with value: 1.0 and parameters: {'lr': 1.2219544456335435e-05, 'collect_input': True, 'collect_hs': False, 'nb': 2, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 16, 'β': 7.867617817691428e-05, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': True}. Best is trial 6 with value: 1.0134874759152217.



| acc_inc/eval_ds [pp]                                                                                                                                                                           |   train |   test |   oos |   rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.β=7.9e-05 lr=1.2e-05 ether.flip_side=True ether.nb=2 ether.reduction=16 |       0 |      0 |     0 | 3.509 |


[I 2024-10-06 01:20:04,785] Trial 9 finished with value: 1.0616570327552988 and parameters: {'lr': 0.00022944454507602003, 'collect_input': True, 'collect_hs': True, 'nb': 7, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 1, 'β': 0.0052873025127761295, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 9 with value: 1.0616570327552988.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                                                 |   train |   test |   oos |    rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True collect_input=True prefvec.use_dpo_prefvec=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.0053 lr=0.00023 ether.flip_side=True ether.nb=7 ether.reduction=1 |   5.785 |      0 | 6.166 | 15.789 |


[I 2024-10-06 01:23:31,950] Trial 10 finished with value: 0.41233140655105976 and parameters: {'lr': 0.004202116526335314, 'collect_input': True, 'collect_hs': False, 'nb': 5, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 5, 'β': 0.00015862805689756922, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 9 with value: 1.0616570327552988.



| acc_inc/eval_ds [pp]                                                                                                                                            |   train |    test |     oos |    rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_input=True prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.00016 lr=0.0042 ether.flip_side=True ether.nb=5 ether.reduction=5 | -46.281 | -33.071 | -58.767 | 14.035 |


[I 2024-10-06 01:26:50,979] Trial 11 finished with value: 0.998073217726397 and parameters: {'lr': 1.0600050132100246e-07, 'collect_input': False, 'collect_hs': True, 'nb': 1, 'Htype': 'etherplusHH', 'flip_side': False, 'reduction': 48, 'β': 0.003810040186310128, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 9 with value: 1.0616570327552988.



| acc_inc/eval_ds [pp]                                                                                                                                                                            |   train |   test |    oos |   rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_nll_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.0038 lr=1.1e-07 ether.nb=1 ether.reduction=48 |       0 |      0 | -0.193 |     0 |


[I 2024-10-06 01:30:14,764] Trial 12 finished with value: 1.1156069364161851 and parameters: {'lr': 0.0002406836245455099, 'collect_input': True, 'collect_hs': False, 'nb': 19, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 0.92244950497797, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 12 with value: 1.1156069364161851.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                     |   train |   test |    oos |    rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.92 lr=0.00024 ether.Htype=ether ether.flip_side=True ether.nb=19 ether.reduction=1 |   0.826 |  0.787 | 11.561 | -3.509 |


[I 2024-10-06 01:33:37,761] Trial 13 finished with value: 1.1483622350674374 and parameters: {'lr': 0.0002901472921452969, 'collect_input': True, 'collect_hs': False, 'nb': 20, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 1.9166022109203233, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                    |   train |   test |    oos |    rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=1.9 lr=0.00029 ether.Htype=ether ether.flip_side=True ether.nb=20 ether.reduction=1 |   2.479 |  0.787 | 14.836 | -7.018 |


[I 2024-10-06 01:36:57,412] Trial 14 finished with value: 1.071290944123314 and parameters: {'lr': 0.0001799198232803581, 'collect_input': True, 'collect_hs': False, 'nb': 25, 'Htype': 'ether', 'flip_side': True, 'reduction': 3, 'β': 1.9699156994635347, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                  |   train |   test |   oos |   rnd |
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=2 lr=0.00018 ether.Htype=ether ether.flip_side=True ether.nb=25 ether.reduction=3 |   1.653 |  0.787 | 7.129 | 3.509 |


[I 2024-10-06 01:40:16,605] Trial 15 finished with value: 1.1290944123314066 and parameters: {'lr': 0.0002642694411963029, 'collect_input': True, 'collect_hs': False, 'nb': 31, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 1.3636261492399193, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                    |   train |   test |    oos |    rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=1.4 lr=0.00026 ether.Htype=ether ether.flip_side=True ether.nb=31 ether.reduction=1 |   0.826 |  0.787 | 12.909 | -7.018 |


[I 2024-10-06 01:43:33,812] Trial 16 finished with value: 1.1059730250481696 and parameters: {'lr': 0.0007282211424206603, 'collect_input': True, 'collect_hs': False, 'nb': 12, 'Htype': 'ether', 'flip_side': True, 'reduction': 10, 'β': 0.1523030038701377, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                      |   train |   test |    oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.15 lr=0.00073 ether.Htype=ether ether.flip_side=True ether.nb=12 ether.reduction=10 |   1.653 | -0.787 | 10.597 | -8.772 |


[I 2024-10-06 01:46:50,631] Trial 17 finished with value: 1.0327552986512525 and parameters: {'lr': 5.8804631302704396e-05, 'collect_input': True, 'collect_hs': False, 'nb': 14, 'Htype': 'ether', 'flip_side': True, 'reduction': 2, 'β': 0.16222423275670422, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                     |   train |   test |   oos |    rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.16 lr=5.9e-05 ether.Htype=ether ether.flip_side=True ether.nb=14 ether.reduction=2 |       0 |      0 | 3.276 | -1.754 |


[I 2024-10-06 01:50:05,875] Trial 18 finished with value: 0.7495183044315994 and parameters: {'lr': 0.0010741233719675444, 'collect_input': True, 'collect_hs': False, 'nb': 30, 'Htype': 'ether', 'flip_side': True, 'reduction': 8, 'β': 0.36563366980566214, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                        |   train |    test |     oos |    rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.37 lr=0.0011 ether.Htype=ether ether.flip_side=True ether.reduction=8 | -19.835 | -11.024 | -25.048 | 12.281 |


[I 2024-10-06 01:53:28,517] Trial 19 finished with value: 0.9942196531791907 and parameters: {'lr': 7.936586160598832e-05, 'collect_input': True, 'collect_hs': False, 'nb': 11, 'Htype': 'ether', 'flip_side': True, 'reduction': 2, 'β': 2.151448230726969e-06, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                        |   train |   test |    oos |   rnd |
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=2.2e-06 lr=7.9e-05 ether.Htype=ether ether.flip_side=True ether.nb=11 ether.reduction=2 |       0 |      0 | -0.578 |     0 |


[I 2024-10-06 01:56:56,395] Trial 20 finished with value: 0.44123314065510605 and parameters: {'lr': 0.0008739464876170618, 'collect_input': False, 'collect_hs': False, 'nb': 3, 'Htype': 'ether', 'flip_side': False, 'reduction': 455, 'β': 0.028499794565144055, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                               |   train |    test |     oos |   rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.028 lr=0.00087 ether.Htype=ether ether.nb=3 ether.reduction=455 | -36.364 | -33.071 | -55.877 | 1.754 |


[I 2024-10-06 02:00:31,734] Trial 21 finished with value: 1.0211946050096339 and parameters: {'lr': 2.880497861507296e-05, 'collect_input': True, 'collect_hs': False, 'nb': 32, 'Htype': 'ether', 'flip_side': True, 'reduction': 29, 'β': 0.03531855721629355, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                       |   train |   test |   oos |   rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.035 lr=2.9e-05 ether.Htype=ether ether.flip_side=True ether.nb=32 ether.reduction=29 |       0 |      0 | 2.119 |     0 |


[I 2024-10-06 02:04:09,140] Trial 22 finished with value: 1.0077071290944124 and parameters: {'lr': 1.5875044375329296e-06, 'collect_input': True, 'collect_hs': False, 'nb': 7, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 2, 'β': 0.4801430080499144, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                        |   train |   test |   oos |   rnd |
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.48 lr=1.6e-06 ether.Htype=etherplus ether.flip_side=True ether.nb=7 ether.reduction=2 |       0 |      0 | 0.771 | 1.754 |


[I 2024-10-06 02:04:09,976] Using an existing study with name 'hs-ortho-prefvec' instead of creating a new one.


loaded 20 hs-ortho-prefvec trials
| hs-ortho-prefvec N=✓20/✖20, best=1.118   |   importance | best                   |
|:-----------------------------------------|-------------:|:-----------------------|
| lr                                       |        0.782 | 0.00012461222072738544 |
| β                                        |        0.161 | 0.3412333287913769     |
| use_nll_loss                             |        0.019 | False                  |
| use_proj_rel                             |        0.019 | True                   |
| use_angle_loss                           |        0.005 | True                   |
| use_dpo_loss                             |        0.005 | True                   |
| weight_tokens                            |        0.005 | True                   |
| orthogonal_map                           |        0.004 | matrix_exp             |
| use_orth_loss                            |        0.001 | False                  |


[I 2024-10-06 02:07:42,742] Trial 20 finished with value: 1.0635838150289019 and parameters: {'lr': 0.00011599334568238253, 'orthogonal_map': 'cayley', 'β': 0.3180114413719155, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                           |   train |   test |   oos |    rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.32 lr=0.00012 ortho.orthogonal_map=cayley |   4.132 |      0 | 6.358 | 10.526 |


[I 2024-10-06 02:11:14,636] Trial 21 finished with value: 1.0809248554913296 and parameters: {'lr': 0.00012491186582150942, 'orthogonal_map': 'matrix_exp', 'β': 0.2045081228059815, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                              |   train |   test |   oos |   rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.2 lr=0.00012 ortho.orthogonal_map=matrix_exp |   4.959 |      0 | 8.092 | 5.263 |


[I 2024-10-06 02:14:51,150] Trial 22 finished with value: 0.5125240847784202 and parameters: {'lr': 0.0016208869013058956, 'orthogonal_map': 'matrix_exp', 'β': 0.016854047517786026, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                               |   train |    test |     oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.017 lr=0.0016 ortho.orthogonal_map=matrix_exp | -16.529 | -20.472 | -48.748 | 17.544 |


[I 2024-10-06 02:18:27,484] Trial 23 finished with value: 1.0134874759152217 and parameters: {'lr': 1.2386301899925658e-05, 'orthogonal_map': 'matrix_exp', 'β': 0.0051659814925599104, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                 |   train |   test |   oos |   rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.0052 lr=1.2e-05 ortho.orthogonal_map=matrix_exp |   0.826 |      0 | 1.349 | 1.754 |


[I 2024-10-06 02:22:01,311] Trial 24 finished with value: 1.0732177263969174 and parameters: {'lr': 0.00011862842031212204, 'orthogonal_map': 'matrix_exp', 'β': 0.2757169873344899, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                               |   train |   test |   oos |   rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.28 lr=0.00012 ortho.orthogonal_map=matrix_exp |   4.959 |      0 | 7.322 |     0 |


[I 2024-10-06 02:25:37,062] Trial 25 finished with value: 0.393063583815029 and parameters: {'lr': 0.002294708301736453, 'orthogonal_map': 'matrix_exp', 'β': 0.04689000102665802, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                               |   train |    test |     oos |   rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.047 lr=0.0023 ortho.orthogonal_map=matrix_exp | -53.719 | -44.094 | -60.694 | 3.509 |


[I 2024-10-06 02:29:11,973] Trial 26 finished with value: 1.001926782273603 and parameters: {'lr': 0.0003386055028879383, 'orthogonal_map': 'matrix_exp', 'β': 8.940060037547945e-05, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                                                |   train |   test |   oos |    rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=8.9e-05 lr=0.00034 ortho.orthogonal_map=matrix_exp |   4.959 | -0.787 | 0.193 | 26.316 |


[I 2024-10-06 02:32:52,439] Trial 27 finished with value: 1.0077071290944124 and parameters: {'lr': 1.1652434708622014e-06, 'orthogonal_map': 'matrix_exp', 'β': 0.33940306448738855, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                               |   train |   test |   oos |   rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.34 lr=1.2e-06 ortho.orthogonal_map=matrix_exp |   0.826 |      0 | 0.771 | 5.263 |


[I 2024-10-06 02:36:22,926] Trial 28 finished with value: 1.0134874759152217 and parameters: {'lr': 1.4988101582053636e-05, 'orthogonal_map': 'cayley', 'β': 0.026993305327625432, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                            |   train |   test |   oos |   rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.027 lr=1.5e-05 ortho.orthogonal_map=cayley |   1.653 |      0 | 1.349 |     0 |


[I 2024-10-06 02:39:40,870] Trial 29 finished with value: 1.0732177263969174 and parameters: {'lr': 9.946755107077159e-05, 'orthogonal_map': 'matrix_exp', 'β': 0.00828625130150565, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                                               |   train |   test |   oos |   rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.0083 lr=9.9e-05 ortho.orthogonal_map=matrix_exp |   4.132 |      0 | 7.322 | 3.509 |


[I 2024-10-06 02:42:58,750] Trial 30 finished with value: 0.5183044315992293 and parameters: {'lr': 0.0012321401965320643, 'orthogonal_map': 'householder', 'β': 1.893415942662686, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                              |   train |    test |    oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|-------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=1.9 lr=0.0012 ortho.orthogonal_map=householder | -18.182 | -18.898 | -48.17 | 15.789 |


[I 2024-10-06 02:46:29,469] Trial 31 finished with value: 1.0578034682080926 and parameters: {'lr': 0.00015813203819291314, 'orthogonal_map': 'matrix_exp', 'β': 0.20091910986632155, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                              |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.2 lr=0.00016 ortho.orthogonal_map=matrix_exp |   4.132 | -0.787 |  5.78 | 15.789 |


[I 2024-10-06 02:49:46,436] Trial 32 finished with value: 1.0597302504816957 and parameters: {'lr': 5.8494588149002014e-05, 'orthogonal_map': 'matrix_exp', 'β': 0.10351004717522656, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                              |   train |   test |   oos |   rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.1 lr=5.8e-05 ortho.orthogonal_map=matrix_exp |   3.306 |      0 | 5.973 | 3.509 |


[I 2024-10-06 02:53:04,199] Trial 33 finished with value: 1.0289017341040463 and parameters: {'lr': 0.0003751225461855568, 'orthogonal_map': 'matrix_exp', 'β': 0.5460991480428049, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                               |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.55 lr=0.00038 ortho.orthogonal_map=matrix_exp |   2.479 |      0 |  2.89 | 24.561 |


[I 2024-10-06 02:56:21,701] Trial 34 finished with value: 1.019267822736031 and parameters: {'lr': 2.0474531032046485e-05, 'orthogonal_map': 'matrix_exp', 'β': 0.1667388434597149, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                                             |   train |   test |   oos |   rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.17 lr=2e-05 ortho.orthogonal_map=matrix_exp |   1.653 |      0 | 1.927 |     0 |


[I 2024-10-06 02:59:25,446] Trial 35 finished with value: 1.0327552986512525 and parameters: {'lr': 8.669379205870907e-05, 'orthogonal_map': 'householder', 'β': 0.001659114844538711, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': False, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |   test |   oos |   rnd |
|:------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.β=0.0017 lr=8.7e-05 ortho.orthogonal_map=householder |   1.653 |      0 | 3.276 | 1.754 |


[I 2024-10-06 03:02:43,687] Trial 36 finished with value: 0.8439306358381503 and parameters: {'lr': 0.0006066997957123247, 'orthogonal_map': 'matrix_exp', 'β': 0.035489104484769905, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                           |   train |   test |     oos |   rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|--------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.035 lr=0.00061 ortho.orthogonal_map=matrix_exp |  -0.826 |  -3.15 | -15.607 | 8.772 |


[I 2024-10-06 03:06:01,244] Trial 37 finished with value: 1.0809248554913296 and parameters: {'lr': 0.0002311191704253201, 'orthogonal_map': 'matrix_exp', 'β': 0.7511549894560026, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                     |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.75 lr=0.00023 ortho.orthogonal_map=matrix_exp |   5.785 |      0 | 8.092 | 17.544 |


[I 2024-10-06 03:09:09,874] Trial 38 finished with value: 1.0077071290944124 and parameters: {'lr': 6.2958567147763926e-06, 'orthogonal_map': 'householder', 'β': 0.012811515967641761, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                      |   train |   test |   oos |   rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_nll_prefvec=True prefvec.use_proj_rel=True prefvec.β=0.013 lr=6.3e-06 ortho.orthogonal_map=householder |   0.826 |      0 | 0.771 | 1.754 |


[I 2024-10-06 03:12:26,747] Trial 39 finished with value: 0.5144508670520231 and parameters: {'lr': 0.0037199683775272696, 'orthogonal_map': 'matrix_exp', 'β': 0.14258481840592632, 'use_orth_loss': True, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                                                  |   train |    test |     oos |   rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.use_orth_prefvec=True prefvec.weight_tokens=True prefvec.β=0.14 lr=0.0037 ortho.orthogonal_map=matrix_exp | -43.802 | -26.772 | -48.555 | 7.018 |


[I 2024-10-06 03:12:27,234] Using an existing study with name 'hs-svd-mse' instead of creating a new one.


loaded 332 hs-svd-mse trials


[I 2024-10-06 03:12:27,865] Using an existing study with name 'dpo' instead of creating a new one.


| hs-svd-mse N=✓14/✖332, best=1.017   |   importance | best                  |
|:------------------------------------|-------------:|:----------------------|
| lr                                  |        0.752 | 0.0011948328168545441 |
| α                                   |        0.189 | 0.6355835028602363    |
| collect_input                       |        0.056 | False                 |
| collect_hs                          |        0.003 | True                  |
| dual_svd                            |        0     | True                  |
| quantile                            |        0     | float                 |
| quantile_value                      |      nan     | 0.30000000000000004   |
loaded 10 dpo trials
| dpo N=✓8/✖10, best=1.087   |   importance |        best |
|:---------------------------|-------------:|------------:|
| lr                         |            1 | 9.84674e-05 |


[I 2024-10-06 03:15:41,117] Trial 10 finished with value: 1.0346820809248556 and parameters: {'lr': 7.45934328572655e-06}. Best is trial 4 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |   rnd |
|:-----------------------|--------:|-------:|------:|------:|
| DPO lr=7.5e-06         |   4.132 | -0.787 | 3.468 | 7.018 |


[I 2024-10-06 03:18:54,059] Trial 11 finished with value: 0.5279383429672447 and parameters: {'lr': 0.005669849511478858}. Best is trial 4 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]   |   train |    test |     oos |    rnd |
|:-----------------------|--------:|--------:|--------:|-------:|
| DPO lr=0.0057          | -13.223 | -19.685 | -47.206 | 12.281 |


[I 2024-10-06 03:22:07,445] Trial 12 finished with value: 1.1464354527938343 and parameters: {'lr': 0.0001440481381559795}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |   rnd |
|:-----------------------|--------:|-------:|-------:|------:|
| DPO lr=0.00014         |   4.959 |  0.787 | 14.644 | 8.772 |


[I 2024-10-06 03:25:23,522] Trial 13 finished with value: 1.1021194605009634 and parameters: {'lr': 9.467489489054227e-05}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=9.5e-05         |   5.785 | -0.787 | 10.212 | 15.789 |


[I 2024-10-06 03:28:40,539] Trial 14 finished with value: 0.9922928709055877 and parameters: {'lr': 0.0001225706694819564}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.00012         |   5.785 |      0 | -0.771 | 19.298 |


[I 2024-10-06 03:31:54,270] Trial 15 finished with value: 1.0211946050096339 and parameters: {'lr': 1.4861973114366403e-05}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |   rnd |
|:-----------------------|--------:|-------:|------:|------:|
| DPO lr=1.5e-05         |   2.479 | -0.787 | 2.119 | 8.772 |


[I 2024-10-06 03:35:07,065] Trial 16 finished with value: 0.7610789980732177 and parameters: {'lr': 0.0004629090336090747}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |     oos |    rnd |
|:-----------------------|--------:|-------:|--------:|-------:|
| DPO lr=0.00046         | -10.744 | -9.449 | -23.892 | 31.579 |


[I 2024-10-06 03:38:32,657] Trial 17 finished with value: 1.001926782273603 and parameters: {'lr': 4.3222833710008596e-05}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |    rnd |
|:-----------------------|--------:|-------:|------:|-------:|
| DPO lr=4.3e-05         |   5.785 | -1.575 | 0.193 | 17.544 |


[I 2024-10-06 03:41:54,741] Trial 18 finished with value: 0.9267822736030829 and parameters: {'lr': 0.0004948947088376027}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.00049         |   4.132 | -1.575 | -7.322 | 24.561 |


[I 2024-10-06 03:45:15,399] Trial 19 finished with value: 1.044315992292871 and parameters: {'lr': 2.494712621055077e-06}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |   rnd |
|:-----------------------|--------:|-------:|------:|------:|
| DPO lr=2.5e-06         |   1.653 |      0 | 4.432 |     0 |


[I 2024-10-06 03:48:34,700] Trial 20 finished with value: 1.0867052023121389 and parameters: {'lr': 0.00013627563818737476}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |    rnd |
|:-----------------------|--------:|-------:|------:|-------:|
| DPO lr=0.00014         |   5.785 | -1.575 | 8.671 | 10.526 |


[I 2024-10-06 03:51:53,156] Trial 21 finished with value: 0.4913294797687862 and parameters: {'lr': 0.001378275834219443}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |    test |     oos |    rnd |
|:-----------------------|--------:|--------:|--------:|-------:|
| DPO lr=0.0014          | -24.793 | -24.409 | -50.867 | 10.526 |


[I 2024-10-06 03:55:12,191] Trial 22 finished with value: 0.9884393063583816 and parameters: {'lr': 4.8851293531686584e-05}. Best is trial 12 with value: 1.1464354527938343.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=4.9e-05         |   5.785 | -1.575 | -1.156 | 17.544 |


[I 2024-10-06 03:58:33,399] Trial 23 finished with value: 1.1522157996146436 and parameters: {'lr': 0.00012420777462285258}. Best is trial 23 with value: 1.1522157996146436.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.00012         |   4.959 | -0.787 | 15.222 | 21.053 |


[I 2024-10-06 04:01:51,422] Trial 24 finished with value: 1.165703275529865 and parameters: {'lr': 0.0002701586889090417}. Best is trial 24 with value: 1.165703275529865.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |    rnd |
|:-----------------------|--------:|-------:|------:|-------:|
| DPO lr=0.00027         |   5.785 | -0.787 | 16.57 | 14.035 |


[I 2024-10-06 04:05:12,567] Trial 25 finished with value: 1.1984585741811176 and parameters: {'lr': 0.0002794857261209373}. Best is trial 25 with value: 1.1984585741811176.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.00028         |   5.785 | -0.787 | 19.846 | 33.333 |


[I 2024-10-06 04:08:33,084] Trial 26 finished with value: 0.4720616570327553 and parameters: {'lr': 0.001030797243720144}. Best is trial 25 with value: 1.1984585741811176.



| acc_inc/eval_ds [pp]   |   train |    test |     oos |   rnd |
|:-----------------------|--------:|--------:|--------:|------:|
| DPO lr=0.001           | -28.926 | -23.622 | -52.794 | 8.772 |


[I 2024-10-06 04:11:47,644] Trial 27 finished with value: 1.1213872832369944 and parameters: {'lr': 0.00029780077058852265}. Best is trial 25 with value: 1.1984585741811176.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.0003          |   4.959 | -1.575 | 12.139 | 10.526 |


[I 2024-10-06 04:15:06,581] Trial 28 finished with value: 1.0539499036608864 and parameters: {'lr': 2.4104922989942412e-05}. Best is trial 25 with value: 1.1984585741811176.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |    rnd |
|:-----------------------|--------:|-------:|------:|-------:|
| DPO lr=2.4e-05         |   5.785 |      0 | 5.395 | 15.789 |


[I 2024-10-06 04:18:25,232] Trial 29 finished with value: 0.5086705202312138 and parameters: {'lr': 0.0028153918881662753}. Best is trial 25 with value: 1.1984585741811176.



| acc_inc/eval_ds [pp]   |   train |   test |     oos |   rnd |
|:-----------------------|--------:|-------:|--------:|------:|
| DPO lr=0.0028          | -24.793 | -21.26 | -49.133 | 1.754 |


[I 2024-10-06 04:18:26,050] Using an existing study with name 'hs-hra-rank' instead of creating a new one.


loaded 45 hs-hra-rank trials
| hs-hra-rank N=✓43/✖45, best=1.087   |   importance |         best |
|:------------------------------------|-------------:|-------------:|
| lr                                  |        0.907 |  0.000157522 |
| r                                   |        0.048 | 96           |
| β                                   |        0.039 | 18.1564      |
| α                                   |        0.005 |  0.123722    |
| apply_GS                            |        0.002 |  0           |


[I 2024-10-06 04:21:47,259] Trial 45 finished with value: 1.023121387283237 and parameters: {'lr': 0.0001650018019567111, 'r': 20, 'apply_GS': True, 'α': 1374.4173453251522, 'β': 17.347052994706306}. Best is trial 21 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]                                                                  |   train |   test |   oos |   rnd |
|:--------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True rank.α=1.4e+03 rank.β=17 lr=0.00017 hra.apply_GS=True hra.r=20 |   3.306 |      0 | 2.312 | 1.754 |


[I 2024-10-06 04:25:09,250] Trial 46 finished with value: 1.0115606936416186 and parameters: {'lr': 0.00038690644171223586, 'r': 36, 'apply_GS': True, 'α': 55.839080090728984, 'β': 24.876699752567294}. Best is trial 21 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]                                                             |   train |   test |   oos |   rnd |
|:---------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True rank.α=56 rank.β=25 lr=0.00039 hra.apply_GS=True hra.r=36 |   4.132 |  0.787 | 1.156 | 8.772 |


[I 2024-10-06 04:28:31,679] Trial 47 finished with value: 1.0057803468208093 and parameters: {'lr': 3.53918958281575e-05, 'r': 12, 'apply_GS': True, 'α': 0.049977200081163936, 'β': 12.71894108589158}. Best is trial 21 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]                                                               |   train |   test |   oos |   rnd |
|:-----------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True rank.α=0.05 rank.β=13 lr=3.5e-05 hra.apply_GS=True hra.r=12 |   1.653 |      0 | 0.578 | 8.772 |


[I 2024-10-06 04:31:52,252] Trial 48 finished with value: 0.6300578034682082 and parameters: {'lr': 0.0006988401492280374, 'r': 406, 'apply_GS': True, 'α': 0.22774762601145612, 'β': 8.828627552617311}. Best is trial 21 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]                                                                |   train |   test |     oos |   rnd |
|:------------------------------------------------------------------------------------|--------:|-------:|--------:|------:|
| ReprPO collect_hs=True rank.α=0.23 rank.β=8.8 lr=0.0007 hra.apply_GS=True hra.r=406 |  -2.479 | -7.874 | -36.994 | 1.754 |


[I 2024-10-06 04:35:11,699] Trial 49 finished with value: 0.5240847784200386 and parameters: {'lr': 0.0014132273758850318, 'r': 6, 'apply_GS': True, 'α': 0.018541603442753043, 'β': 1.10382516653629}. Best is trial 21 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]                                                               |   train |    test |     oos |    rnd |
|:-----------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_hs=True rank.α=0.019 rank.β=1.1 lr=0.0014 hra.apply_GS=True hra.r=6 | -18.182 | -18.898 | -47.592 | 10.526 |


[I 2024-10-06 04:38:34,116] Trial 50 finished with value: 0.9922928709055877 and parameters: {'lr': 6.343143630486608e-05, 'r': 25, 'apply_GS': False, 'α': 0.003126572876981708, 'β': 6.369388941969746}. Best is trial 21 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]                                                                   |   train |   test |    oos |    rnd |
|:---------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_hs=True rank.α=0.0031 rank.β=6.4 lr=6.3e-05 hra.apply_GS=False hra.r=25 |   2.479 | -0.787 | -0.771 | 17.544 |


[I 2024-10-06 04:41:55,592] Trial 51 finished with value: 1.0500963391136802 and parameters: {'lr': 0.0003156627088202522, 'r': 80, 'apply_GS': True, 'α': 9.04039147140358, 'β': 0.34179176205063594}. Best is trial 21 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]                                                              |   train |   test |   oos |   rnd |
|:----------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True rank.α=9 rank.β=0.34 lr=0.00032 hra.apply_GS=True hra.r=80 |   4.959 |  0.787 |  5.01 | 28.07 |


[I 2024-10-06 04:45:18,975] Trial 52 finished with value: 1.0038535645472062 and parameters: {'lr': 3.629782353829205e-07, 'r': 77, 'apply_GS': False, 'α': 7.024120398344573, 'β': 0.28463529783946456}. Best is trial 21 with value: 1.0867052023121389.



| acc_inc/eval_ds [pp]                                                               |   train |   test |   oos |   rnd |
|:-----------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True rank.α=7 rank.β=0.28 lr=3.6e-07 hra.apply_GS=False hra.r=77 |       0 |      0 | 0.385 | 5.263 |


[I 2024-10-06 04:48:40,784] Trial 53 finished with value: 1.117533718689788 and parameters: {'lr': 0.00029506781353040817, 'r': 87, 'apply_GS': True, 'α': 2.4742183958572515, 'β': 0.44127239648806993}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                               |   train |   test |    oos |   rnd |
|:-----------------------------------------------------------------------------------|--------:|-------:|-------:|------:|
| ReprPO collect_hs=True rank.α=2.5 rank.β=0.44 lr=0.0003 hra.apply_GS=True hra.r=87 |   2.479 |      0 | 11.753 | 3.509 |


[I 2024-10-06 04:52:02,552] Trial 54 finished with value: 1.088631984585742 and parameters: {'lr': 0.0003948719794516659, 'r': 102, 'apply_GS': True, 'α': 2.5577812877325714, 'β': 0.41359973300906705}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                 |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=2.6 rank.β=0.41 lr=0.00039 hra.apply_GS=True hra.r=102 |   2.479 |  0.787 | 8.863 | 17.544 |


[I 2024-10-06 04:55:28,613] Trial 55 finished with value: 0.4816955684007707 and parameters: {'lr': 0.0033460446873014157, 'r': 101, 'apply_GS': True, 'α': 2.571463595470156, 'β': 0.7895297443206247}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |    test |    oos |   rnd |
|:------------------------------------------------------------------------------------|--------:|--------:|-------:|------:|
| ReprPO collect_hs=True rank.α=2.6 rank.β=0.79 lr=0.0033 hra.apply_GS=True hra.r=101 | -17.355 | -19.685 | -51.83 | 7.018 |


[I 2024-10-06 04:58:53,833] Trial 56 finished with value: 0.9210019267822737 and parameters: {'lr': 0.0005077233455473574, 'r': 46, 'apply_GS': True, 'α': 0.7033698666887437, 'β': 0.41171288871287326}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=0.7 rank.β=0.41 lr=0.00051 hra.apply_GS=True hra.r=46 |   2.479 |  -3.15 |  -7.9 | -5.263 |


[I 2024-10-06 05:02:20,038] Trial 57 finished with value: 0.5279383429672447 and parameters: {'lr': 0.0012516453018802708, 'r': 136, 'apply_GS': True, 'α': 0.23593495449900395, 'β': 0.19739584183449618}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |    test |     oos |   rnd |
|:------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO collect_hs=True rank.α=0.24 rank.β=0.2 lr=0.0013 hra.apply_GS=True hra.r=136 | -16.529 | -19.685 | -47.206 | 3.509 |


[I 2024-10-06 05:05:40,370] Trial 58 finished with value: 1.0269749518304432 and parameters: {'lr': 0.00013322153067244852, 'r': 192, 'apply_GS': True, 'α': 15.307673696115064, 'β': 0.1991769444845182}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                               |   train |   test |   oos |   rnd |
|:-----------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True rank.α=15 rank.β=0.2 lr=0.00013 hra.apply_GS=True hra.r=192 |   3.306 |      0 | 2.697 | 5.263 |


[I 2024-10-06 05:09:02,396] Trial 59 finished with value: 1.0173410404624277 and parameters: {'lr': 0.00035057010920508684, 'r': 278, 'apply_GS': False, 'α': 0.92777953010552, 'β': 0.8667848203214406}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                   |   train |   test |   oos |    rnd |
|:---------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=0.93 rank.β=0.87 lr=0.00035 hra.apply_GS=False hra.r=278 |   4.959 | -1.575 | 1.734 | -1.754 |


[I 2024-10-06 05:12:23,645] Trial 60 finished with value: 1.0134874759152217 and parameters: {'lr': 2.6943626187168916e-05, 'r': 62, 'apply_GS': True, 'α': 3.2400195090183015, 'β': 0.48693025372511045}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |   test |   oos |   rnd |
|:------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True rank.α=3.2 rank.β=0.49 lr=2.7e-05 hra.apply_GS=True hra.r=62 |   0.826 |      0 | 1.349 |     0 |


[I 2024-10-06 05:15:45,524] Trial 61 finished with value: 1.023121387283237 and parameters: {'lr': 7.346940709116254e-05, 'r': 138, 'apply_GS': False, 'α': 126.29771560392675, 'β': 0.23092723790281325}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                      |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=1.3e+02 rank.β=0.23 lr=7.3e-05 hra.apply_GS=False hra.r=138 |   1.653 |      0 | 2.312 | -1.754 |


[I 2024-10-06 05:19:09,896] Trial 62 finished with value: 0.7919075144508672 and parameters: {'lr': 1.4152472933346213e-05, 'r': 91, 'apply_GS': False, 'α': 0.0002709613488314965, 'β': 0.13491843278385426}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                     |   train |   test |     oos |    rnd |
|:-----------------------------------------------------------------------------------------|--------:|-------:|--------:|-------:|
| ReprPO collect_hs=True rank.α=0.00027 rank.β=0.13 lr=1.4e-05 hra.apply_GS=False hra.r=91 |  -0.826 | -2.362 | -20.809 | 15.789 |


[I 2024-10-06 05:22:33,309] Trial 63 finished with value: 1.0404624277456649 and parameters: {'lr': 0.0002670484007598556, 'r': 74, 'apply_GS': True, 'α': 7.031974420028055, 'β': 0.35395508322985203}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                              |   train |   test |   oos |   rnd |
|:----------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True rank.α=7 rank.β=0.35 lr=0.00027 hra.apply_GS=True hra.r=74 |   4.132 |  0.787 | 4.046 |     0 |


[I 2024-10-06 05:26:00,618] Trial 64 finished with value: 1.0211946050096339 and parameters: {'lr': 0.00028840515000813866, 'r': 53, 'apply_GS': True, 'α': 8.64113588333861, 'β': 0.3556643353729115}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=8.6 rank.β=0.36 lr=0.00029 hra.apply_GS=True hra.r=53 |   3.306 |  0.787 | 2.119 | 12.281 |


[I 2024-10-06 05:26:01,497] Using an existing study with name 'projgrad2' instead of creating a new one.
[W 2024-10-06 05:26:01,717] Study instance does not contain completed trials.


loaded 307 projgrad2 trials
| projgrad2 N=✓0/✖307, best=nan   | importance   | best   |
|---------------------------------|--------------|--------|


[W 2024-10-06 05:26:02,123] Trial 307 failed with parameters: {'lr': 7.45934328572655e-06, 'β': 0.9507143064099162, 'reverse_pref': True, 'scale_orth': True, 'weight_dim': 0} because of the following error: ValueError('CategoricalDistribution does not support dynamic value space.').
Traceback (most recent call last):
  File "/workspace/repr-preference-optimization/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_277857/875328504.py", line 18, in objective
    kwargs = trial2args(trial)
             ^^^^^^^^^^^^^^^^^
  File "/workspace/repr-preference-optimization/reprpo/hp/space.py", line 78, in projgrad
    "neg_slope": trial.suggest_categorical("neg_slope",[0, 'float']), # error?
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/repr-preference-optimization/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py", lin

loaded 24 projbp trials
| projbp N=✓10/✖24, best=1.033   |   importance |        best |
|:-------------------------------|-------------:|------------:|
| β                              |        0.512 | 0.366362    |
| lr                             |        0.311 | 2.88884e-06 |
| scale_orth                     |        0.128 | 0           |
| mag_clip                       |        0.021 |             |
| neg_slope                      |        0.014 | 0           |
| reverse_pref                   |        0.014 | 0           |


[W 2024-10-06 05:26:13,043] Trial 24 failed with parameters: {'lr': 0.000510881708506212, 'β': 0.40691929271586114, 'reverse_pref': False, 'scale_orth': False, 'neg_slope': 0, 'mag_clip': 'float', 'mag_clip_value': 0.014599142035203292} because of the following error: RuntimeError('The size of tensor a (512) must match the size of tensor b (16) at non-singleton dimension 2').
Traceback (most recent call last):
  File "/workspace/repr-preference-optimization/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_277857/875328504.py", line 19, in objective
    r = objective_func(kwargs, trial, starter_experiment_name)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_277857/875328504.py", line 14, in objective_func
    r = train(cfg, trial=trial)
        ^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/repr-preference-optimization/reprpo

loaded 23 ether-prefvec trials
| ether-prefvec N=✓21/✖23, best=1.148   |   importance | best                  |
|:--------------------------------------|-------------:|:----------------------|
| lr                                    |        0.485 | 0.0002901472921452969 |
| β                                     |        0.255 | 1.9166022109203233    |
| reduction                             |        0.192 | 1                     |
| nb                                    |        0.057 | 20                    |
| use_dpo_loss                          |        0.004 | False                 |
| collect_hs                            |        0.002 | False                 |
| Htype                                 |        0.001 | ether                 |
| use_orth_loss                         |        0.001 | True                  |
| weight_tokens                         |        0.001 | True                  |
| collect_input                         |        0     | True                 

[I 2024-10-06 05:32:45,864] Trial 23 finished with value: 1.1290944123314066 and parameters: {'lr': 0.00027057280051179895, 'collect_input': True, 'collect_hs': False, 'nb': 20, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 1.6123145596810586, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 13 with value: 1.1483622350674374.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                    |   train |   test |    oos |    rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=1.6 lr=0.00027 ether.Htype=ether ether.flip_side=True ether.nb=20 ether.reduction=1 |       0 | -0.787 | 12.909 | -1.754 |


[I 2024-10-06 05:36:13,958] Trial 24 finished with value: 1.183044315992293 and parameters: {'lr': 0.00037772770210724844, 'collect_input': True, 'collect_hs': False, 'nb': 20, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 1.9848539330526844, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                  |   train |   test |    oos |     rnd |
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|--------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=2 lr=0.00038 ether.Htype=ether ether.flip_side=True ether.nb=20 ether.reduction=1 |   3.306 |      0 | 18.304 | -12.281 |


[I 2024-10-06 05:39:38,173] Trial 25 finished with value: 0.7398843930635839 and parameters: {'lr': 0.0013843924204659044, 'collect_input': True, 'collect_hs': False, 'nb': 18, 'Htype': 'ether', 'flip_side': True, 'reduction': 2, 'β': 0.0642733051555277, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                     |   train |    test |     oos |   rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.064 lr=0.0014 ether.Htype=ether ether.flip_side=True ether.nb=18 ether.reduction=2 | -12.397 | -11.024 | -26.012 | 8.772 |


[I 2024-10-06 05:42:58,854] Trial 26 finished with value: 1.0308285163776494 and parameters: {'lr': 6.222901334703244e-05, 'collect_input': True, 'collect_hs': False, 'nb': 8, 'Htype': 'ether', 'flip_side': True, 'reduction': 7, 'β': 0.21886655961073495, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                    |   train |   test |   oos |   rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.22 lr=6.2e-05 ether.Htype=ether ether.flip_side=True ether.nb=8 ether.reduction=7 |       0 |      0 | 3.083 | 1.754 |


[I 2024-10-06 05:46:26,916] Trial 27 finished with value: 1.088631984585742 and parameters: {'lr': 0.000540241927460827, 'collect_input': True, 'collect_hs': False, 'nb': 15, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 0.011498665887108626, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                      |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.011 lr=0.00054 ether.Htype=ether ether.flip_side=True ether.nb=15 ether.reduction=1 |       0 | -0.787 | 8.863 | -5.263 |


[I 2024-10-06 05:49:54,684] Trial 28 finished with value: 0.4084778420038536 and parameters: {'lr': 0.009807755438248326, 'collect_input': False, 'collect_hs': False, 'nb': 23, 'Htype': 'ether', 'flip_side': False, 'reduction': 3, 'β': 0.7347190386431403, 'use_orth_loss': True, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                     |   train |    test |     oos |   rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO prefvec.use_angle_prefvec=False prefvec.use_orth_prefvec=True prefvec.weight_tokens=True prefvec.β=0.73 lr=0.0098 ether.Htype=ether ether.nb=23 ether.reduction=3 | -47.934 | -37.795 | -59.152 | 3.509 |


[I 2024-10-06 05:53:19,039] Trial 29 finished with value: 0.5414258188824663 and parameters: {'lr': 0.001959142662897505, 'collect_input': True, 'collect_hs': False, 'nb': 9, 'Htype': 'ether', 'flip_side': True, 'reduction': 2, 'β': 1.9242415385419622, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                 |   train |    test |     oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=1.9 lr=0.002 ether.Htype=ether ether.flip_side=True ether.nb=9 ether.reduction=2 | -34.711 | -26.772 | -45.857 | 12.281 |


[I 2024-10-06 05:56:45,693] Trial 30 finished with value: 1.0500963391136802 and parameters: {'lr': 0.00013320293018846333, 'collect_input': True, 'collect_hs': False, 'nb': 5, 'Htype': 'etherplus', 'flip_side': True, 'reduction': 13, 'β': 0.12242203835214704, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                         |   train |   test |   oos |   rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.12 lr=0.00013 ether.Htype=etherplus ether.flip_side=True ether.nb=5 ether.reduction=13 |       0 |      0 |  5.01 |     0 |


[I 2024-10-06 06:00:09,564] Trial 31 finished with value: 0.9961464354527939 and parameters: {'lr': 3.5423338515200635e-05, 'collect_input': True, 'collect_hs': False, 'nb': 32, 'Htype': 'etherplus', 'flip_side': False, 'reduction': 6, 'β': 0.5002968574839632, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': False}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                |   train |   test |    oos |   rnd |
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|------:|
| ReprPO collect_input=True prefvec.use_angle_prefvec=False prefvec.use_nll_prefvec=True prefvec.β=0.5 lr=3.5e-05 ether.Htype=etherplus ether.nb=32 ether.reduction=6 |       0 |      0 | -0.385 | 1.754 |


[I 2024-10-06 06:03:33,557] Trial 32 finished with value: 1.0770712909441233 and parameters: {'lr': 0.00034275767251676153, 'collect_input': True, 'collect_hs': False, 'nb': 2, 'Htype': 'ether', 'flip_side': True, 'reduction': 29, 'β': 0.014314831997847988, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                      |   train |   test |   oos |   rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.014 lr=0.00034 ether.Htype=ether ether.flip_side=True ether.nb=2 ether.reduction=29 |   1.653 | -0.787 | 7.707 | 3.509 |


[I 2024-10-06 06:07:00,437] Trial 33 finished with value: 1.1753371868978806 and parameters: {'lr': 0.0003513706413025965, 'collect_input': True, 'collect_hs': False, 'nb': 20, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 1.7504573795760632, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                    |   train |   test |    oos |     rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|--------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=1.8 lr=0.00035 ether.Htype=ether ether.flip_side=True ether.nb=20 ether.reduction=1 |   1.653 |      0 | 17.534 | -12.281 |


[I 2024-10-06 06:10:23,395] Trial 34 finished with value: 1.1502890173410405 and parameters: {'lr': 0.0005495447376543452, 'collect_input': True, 'collect_hs': False, 'nb': 24, 'Htype': 'ether', 'flip_side': True, 'reduction': 3, 'β': 0.8364594214066026, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                     |   train |   test |    oos |    rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.84 lr=0.00055 ether.Htype=ether ether.flip_side=True ether.nb=24 ether.reduction=3 |       0 | -0.787 | 15.029 | -3.509 |


[I 2024-10-06 06:13:47,706] Trial 35 finished with value: 1.159922928709056 and parameters: {'lr': 0.0005024514091269327, 'collect_input': True, 'collect_hs': False, 'nb': 16, 'Htype': 'ether', 'flip_side': True, 'reduction': 3, 'β': 0.3210368338780154, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                    |   train |   test |    oos |    rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.32 lr=0.0005 ether.Htype=ether ether.flip_side=True ether.nb=16 ether.reduction=3 |   4.132 | -0.787 | 15.992 | -8.772 |


[I 2024-10-06 06:17:00,840] Trial 36 finished with value: 0.4315992292870906 and parameters: {'lr': 0.0020246691409722484, 'collect_input': True, 'collect_hs': True, 'nb': 15, 'Htype': 'ether', 'flip_side': True, 'reduction': 3, 'β': 0.3325102211190215, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                          |   train |    test |    oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|-------:|-------:|
| ReprPO collect_hs=True collect_input=True prefvec.use_angle_prefvec=False prefvec.use_nll_prefvec=True prefvec.use_proj_rel=True prefvec.β=0.33 lr=0.002 ether.Htype=ether ether.flip_side=True ether.nb=15 ether.reduction=3 | -47.107 | -35.433 | -56.84 | 12.281 |


[I 2024-10-06 06:20:19,870] Trial 37 finished with value: 1.0500963391136802 and parameters: {'lr': 0.00010045460667598762, 'collect_input': True, 'collect_hs': False, 'nb': 11, 'Htype': 'oft', 'flip_side': True, 'reduction': 4, 'β': 0.07531537523426912, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                            |   train |   test |   oos |   rnd |
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_orth_prefvec=True prefvec.weight_tokens=True prefvec.β=0.075 lr=0.0001 ether.Htype=oft ether.flip_side=True ether.nb=11 ether.reduction=4 |       0 |  0.787 |  5.01 | 1.754 |


[I 2024-10-06 06:23:44,872] Trial 38 finished with value: 0.6281310211946051 and parameters: {'lr': 0.0004955151888597254, 'collect_input': False, 'collect_hs': True, 'nb': 23, 'Htype': 'etherplus', 'flip_side': False, 'reduction': 3, 'β': 0.7049847512470013, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                   |   train |    test |     oos |   rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.use_nll_prefvec=True prefvec.use_proj_rel=True prefvec.β=0.7 lr=0.0005 ether.Htype=etherplus ether.nb=23 ether.reduction=3 | -16.529 | -11.024 | -37.187 | 28.07 |


[I 2024-10-06 06:27:04,906] Trial 39 finished with value: 0.4470134874759153 and parameters: {'lr': 0.0022100376622240974, 'collect_input': True, 'collect_hs': False, 'nb': 15, 'Htype': 'oft', 'flip_side': True, 'reduction': 5, 'β': 0.28954685653247836, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                     |   train |    test |     oos |    rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_input=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.29 lr=0.0022 ether.Htype=oft ether.flip_side=True ether.nb=15 ether.reduction=5 | -43.802 | -33.858 | -55.299 | -5.263 |


[I 2024-10-06 06:30:38,867] Trial 40 finished with value: 1.0038535645472062 and parameters: {'lr': 1.4893745136120191e-05, 'collect_input': False, 'collect_hs': False, 'nb': 25, 'Htype': 'ether', 'flip_side': True, 'reduction': 21, 'β': 0.07617990393397168, 'use_orth_loss': True, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': False}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                               |   train |   test |   oos |   rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO prefvec.use_angle_prefvec=False prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.β=0.076 lr=1.5e-05 ether.Htype=ether ether.flip_side=True ether.nb=25 ether.reduction=21 |   0.826 |      0 | 0.385 | 1.754 |


[I 2024-10-06 06:34:03,575] Trial 41 finished with value: 0.535645472061657 and parameters: {'lr': 0.00855261240463271, 'collect_input': True, 'collect_hs': True, 'nb': 9, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 106, 'β': 0.0015255924591023127, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                       |   train |    test |     oos |   rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO collect_hs=True collect_input=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.0015 lr=0.0086 ether.flip_side=True ether.nb=9 ether.reduction=106 | -24.793 | -22.835 | -46.435 | 28.07 |


[I 2024-10-06 06:37:27,715] Trial 42 finished with value: 0.535645472061657 and parameters: {'lr': 0.00044538247629857857, 'collect_input': True, 'collect_hs': False, 'nb': 17, 'Htype': 'oft', 'flip_side': False, 'reduction': 2, 'β': 0.0002725993287154416, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                 |   train |    test |     oos |   rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.00027 lr=0.00045 ether.Htype=oft ether.nb=17 ether.reduction=2 |  -25.62 | -19.685 | -46.435 | 8.772 |


[I 2024-10-06 06:37:28,572] Using an existing study with name 'hs-ortho-prefvec' instead of creating a new one.


loaded 40 hs-ortho-prefvec trials
| hs-ortho-prefvec N=✓40/✖40, best=1.118   |   importance | best                   |
|:-----------------------------------------|-------------:|:-----------------------|
| lr                                       |        0.767 | 0.00012461222072738544 |
| β                                        |        0.207 | 0.3412333287913769     |
| use_nll_loss                             |        0.009 | False                  |
| use_orth_loss                            |        0.006 | False                  |
| orthogonal_map                           |        0.004 | matrix_exp             |
| use_dpo_loss                             |        0.003 | True                   |
| weight_tokens                            |        0.003 | True                   |
| use_proj_rel                             |        0.001 | True                   |
| use_angle_loss                           |        0     | True                   |


[I 2024-10-06 06:40:39,527] Trial 40 finished with value: 0.9961464354527939 and parameters: {'lr': 3.9867935658727725e-05, 'orthogonal_map': 'cayley', 'β': 0.001837247699906843, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                |   train |   test |    oos |   rnd |
|:------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|------:|
| ReprPO collect_hs=True prefvec.use_nll_prefvec=True prefvec.use_proj_rel=True prefvec.β=0.0018 lr=4e-05 ortho.orthogonal_map=cayley |       0 |      0 | -0.385 | 7.018 |


[I 2024-10-06 06:44:01,288] Trial 41 finished with value: 1.094412331406551 and parameters: {'lr': 0.00023024988630700053, 'orthogonal_map': 'matrix_exp', 'β': 0.8033261383517015, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                    |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.8 lr=0.00023 ortho.orthogonal_map=matrix_exp |   4.132 |      0 | 9.441 | 14.035 |


[I 2024-10-06 06:47:25,864] Trial 42 finished with value: 1.0346820809248556 and parameters: {'lr': 5.960061887970884e-05, 'orthogonal_map': 'matrix_exp', 'β': 0.591900446280974, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                   |   train |   test |   oos |   rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.59 lr=6e-05 ortho.orthogonal_map=matrix_exp |   1.653 |      0 | 3.468 | 3.509 |


[I 2024-10-06 06:50:54,519] Trial 43 finished with value: 0.5221579961464355 and parameters: {'lr': 0.0009982365511321367, 'orthogonal_map': 'matrix_exp', 'β': 0.9946435751501622, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                   |   train |    test |     oos |    rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.99 lr=0.001 ortho.orthogonal_map=matrix_exp | -12.397 | -20.472 | -47.784 | 10.526 |


[I 2024-10-06 06:54:18,932] Trial 44 finished with value: 0.9672447013487476 and parameters: {'lr': 0.0004545300790566714, 'orthogonal_map': 'matrix_exp', 'β': 1.8271581791162788, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                    |   train |   test |    oos |   rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=1.8 lr=0.00045 ortho.orthogonal_map=matrix_exp |   2.479 |  0.787 | -3.276 | 8.772 |


[I 2024-10-06 06:57:44,195] Trial 45 finished with value: 1.0578034682080926 and parameters: {'lr': 0.00019730724112700825, 'orthogonal_map': 'matrix_exp', 'β': 0.05234674375767749, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                     |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.052 lr=0.0002 ortho.orthogonal_map=matrix_exp |   4.959 |      0 |  5.78 | 17.544 |


[I 2024-10-06 07:01:02,000] Trial 46 finished with value: 1.044315992292871 and parameters: {'lr': 7.42404537558471e-05, 'orthogonal_map': 'matrix_exp', 'β': 0.5350921175310159, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                                                  |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.use_nll_prefvec=True prefvec.weight_tokens=True prefvec.β=0.54 lr=7.4e-05 ortho.orthogonal_map=matrix_exp |   1.653 |      0 | 4.432 | -3.509 |


[I 2024-10-06 07:04:09,917] Trial 47 finished with value: 0.4296724470134875 and parameters: {'lr': 0.00019531754032409994, 'orthogonal_map': 'householder', 'β': 0.332477402970353, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': False, 'weight_tokens': False, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                             |   train |    test |     oos |   rnd |
|:-----------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.β=0.33 lr=0.0002 ortho.orthogonal_map=householder |  -48.76 | -33.858 | -57.033 | 5.263 |


[I 2024-10-06 07:07:30,100] Trial 48 finished with value: 1.0250481695568403 and parameters: {'lr': 2.3213575329172547e-05, 'orthogonal_map': 'matrix_exp', 'β': 0.13674752166625562, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                               |   train |   test |   oos |   rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.14 lr=2.3e-05 ortho.orthogonal_map=matrix_exp |   0.826 |      0 | 2.505 | 1.754 |


[I 2024-10-06 07:10:52,526] Trial 49 finished with value: 0.8323699421965318 and parameters: {'lr': 0.000601715290068621, 'orthogonal_map': 'matrix_exp', 'β': 1.8569414712450746e-05, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                                               |   train |   test |     oos |    rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|--------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=1.9e-05 lr=0.0006 ortho.orthogonal_map=matrix_exp |  -5.785 | -7.087 | -16.763 | -7.018 |


[I 2024-10-06 07:14:14,044] Trial 50 finished with value: 1.0308285163776494 and parameters: {'lr': 3.4925586100146066e-05, 'orthogonal_map': 'cayley', 'β': 0.07181876571672766, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                            |   train |   test |   oos |   rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.072 lr=3.5e-05 ortho.orthogonal_map=cayley |   2.479 |      0 | 3.083 |     0 |


[I 2024-10-06 07:17:36,250] Trial 51 finished with value: 1.0539499036608864 and parameters: {'lr': 0.00015236756126156922, 'orthogonal_map': 'matrix_exp', 'β': 0.7072515850874993, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                     |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.71 lr=0.00015 ortho.orthogonal_map=matrix_exp |   4.132 |      0 | 5.395 | 21.053 |


[I 2024-10-06 07:21:08,514] Trial 52 finished with value: 1.0597302504816957 and parameters: {'lr': 0.0003098713597710707, 'orthogonal_map': 'matrix_exp', 'β': 0.9103861835463519, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 19 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                                                                                                     |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.91 lr=0.00031 ortho.orthogonal_map=matrix_exp |   4.959 |  0.787 | 5.973 | 19.298 |


[I 2024-10-06 07:24:30,450] Trial 53 finished with value: 1.1310211946050097 and parameters: {'lr': 0.00023889593063246652, 'orthogonal_map': 'matrix_exp', 'β': 0.2189870349392484, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 53 with value: 1.1310211946050097.



| acc_inc/eval_ds [pp]                                                                                                                                                     |   train |   test |    oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.22 lr=0.00024 ortho.orthogonal_map=matrix_exp |   4.959 | -0.787 | 13.102 | 17.544 |


[I 2024-10-06 07:27:50,311] Trial 54 finished with value: 0.5028901734104047 and parameters: {'lr': 0.0008385828395394134, 'orthogonal_map': 'matrix_exp', 'β': 0.24323702641375866, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 53 with value: 1.1310211946050097.



| acc_inc/eval_ds [pp]                                                                                                                                                     |   train |    test |     oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.24 lr=0.00084 ortho.orthogonal_map=matrix_exp | -23.967 | -26.772 | -49.711 | 19.298 |


[I 2024-10-06 07:31:13,089] Trial 55 finished with value: 1.0115606936416186 and parameters: {'lr': 0.0004342106741843916, 'orthogonal_map': 'matrix_exp', 'β': 0.024760626818238703, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 53 with value: 1.1310211946050097.



| acc_inc/eval_ds [pp]                                                                                                                                                      |   train |   test |   oos |    rnd |
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.025 lr=0.00043 ortho.orthogonal_map=matrix_exp |   4.132 | -0.787 | 1.156 | 21.053 |


[I 2024-10-06 07:34:31,274] Trial 56 finished with value: 1.0520231213872833 and parameters: {'lr': 0.0001234371308829511, 'orthogonal_map': 'matrix_exp', 'β': 0.4086591646581516, 'use_orth_loss': False, 'use_angle_loss': False, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': False}. Best is trial 53 with value: 1.1310211946050097.



| acc_inc/eval_ds [pp]                                                                                                                                                     |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_angle_prefvec=False prefvec.use_dpo_prefvec=True prefvec.weight_tokens=True prefvec.β=0.41 lr=0.00012 ortho.orthogonal_map=matrix_exp |   3.306 |      0 | 5.202 | -1.754 |


[I 2024-10-06 07:37:42,564] Trial 57 finished with value: 1.0327552986512525 and parameters: {'lr': 0.00023847128154378647, 'orthogonal_map': 'matrix_exp', 'β': 0.09293347427064105, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': True}. Best is trial 53 with value: 1.1310211946050097.



| acc_inc/eval_ds [pp]                                                                                                                     |   train |   test |   oos |    rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_nll_prefvec=True prefvec.use_proj_rel=True prefvec.β=0.093 lr=0.00024 ortho.orthogonal_map=matrix_exp |   0.826 |      0 | 3.276 | -7.018 |


[I 2024-10-06 07:41:01,139] Trial 58 finished with value: 0.4913294797687862 and parameters: {'lr': 0.0015315026967341549, 'orthogonal_map': 'matrix_exp', 'β': 0.05018204163362304, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 53 with value: 1.1310211946050097.



| acc_inc/eval_ds [pp]                                                                                                                                              |   train |    test |     oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.05 lr=0.0015 ortho.orthogonal_map=matrix_exp | -32.231 | -23.622 | -50.867 | 22.807 |


[I 2024-10-06 07:44:23,259] Trial 59 finished with value: 1.0751445086705202 and parameters: {'lr': 8.643143914150594e-05, 'orthogonal_map': 'matrix_exp', 'β': 0.20213469069945655, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 53 with value: 1.1310211946050097.



| acc_inc/eval_ds [pp]                                                                                                                                              |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True prefvec.use_dpo_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.2 lr=8.6e-05 ortho.orthogonal_map=matrix_exp |   3.306 |      0 | 7.514 | -1.754 |


[I 2024-10-06 07:44:24,106] Using an existing study with name 'projgrad2' instead of creating a new one.




[W 2024-10-06 07:44:24,298] Study instance does not contain completed trials.


loaded 308 projgrad2 trials
| projgrad2 N=✓0/✖308, best=nan   | importance   | best   |
|---------------------------------|--------------|--------|


[W 2024-10-06 07:44:24,811] Trial 308 failed with parameters: {'lr': 7.45934328572655e-06, 'β': 0.9507143064099162, 'reverse_pref': True, 'scale_orth': True, 'weight_dim': 0} because of the following error: ValueError('CategoricalDistribution does not support dynamic value space.').
Traceback (most recent call last):
  File "/workspace/repr-preference-optimization/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_277857/875328504.py", line 18, in objective
    kwargs = trial2args(trial)
             ^^^^^^^^^^^^^^^^^
  File "/workspace/repr-preference-optimization/reprpo/hp/space.py", line 78, in projgrad
    "neg_slope": trial.suggest_categorical("neg_slope",[0, 'float']), # error?
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/repr-preference-optimization/.venv/lib/python3.11/site-packages/optuna/trial/_trial.py", lin

loaded 332 hs-svd-mse trials


[I 2024-10-06 07:44:26,358] Using an existing study with name 'hs-hra-rank' instead of creating a new one.


| hs-svd-mse N=✓14/✖332, best=1.017   |   importance | best                  |
|:------------------------------------|-------------:|:----------------------|
| lr                                  |        0.752 | 0.0011948328168545441 |
| α                                   |        0.189 | 0.6355835028602363    |
| collect_input                       |        0.056 | False                 |
| collect_hs                          |        0.003 | True                  |
| dual_svd                            |        0     | True                  |
| quantile                            |        0     | float                 |
| quantile_value                      |      nan     | 0.30000000000000004   |
loaded 65 hs-hra-rank trials
| hs-hra-rank N=✓63/✖65, best=1.118   |   importance |         best |
|:------------------------------------|-------------:|-------------:|
| lr                                  |        0.728 |  0.000295068 |
| r                                   |        0.1

[I 2024-10-06 07:47:44,337] Trial 65 finished with value: 0.5684007707129095 and parameters: {'lr': 0.0008671088919636775, 'r': 89, 'apply_GS': True, 'α': 26.671293855105542, 'β': 0.7059876643354057}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                               |   train |    test |    oos |    rnd |
|:-----------------------------------------------------------------------------------|--------:|--------:|-------:|-------:|
| ReprPO collect_hs=True rank.α=27 rank.β=0.71 lr=0.00087 hra.apply_GS=True hra.r=89 |  -8.264 | -11.811 | -43.16 | 21.053 |


[I 2024-10-06 07:51:04,866] Trial 66 finished with value: 1.0539499036608864 and parameters: {'lr': 0.0001632002830196323, 'r': 182, 'apply_GS': True, 'α': 1.0316397168629856, 'β': 0.44925439438157366}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                               |   train |   test |   oos |    rnd |
|:-----------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=1 rank.β=0.45 lr=0.00016 hra.apply_GS=True hra.r=182 |   4.959 |      0 | 5.395 | -3.509 |


[I 2024-10-06 07:54:26,299] Trial 67 finished with value: 1.0578034682080926 and parameters: {'lr': 0.0001410901695945212, 'r': 240, 'apply_GS': True, 'α': 1.5782195316775016, 'β': 0.5217092900690894}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                 |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=1.6 rank.β=0.52 lr=0.00014 hra.apply_GS=True hra.r=240 |   2.479 |      0 |  5.78 | -1.754 |


[I 2024-10-06 07:57:45,509] Trial 68 finished with value: 0.9942196531791907 and parameters: {'lr': 0.0001874991694384327, 'r': 379, 'apply_GS': True, 'α': 1.6461247721080716, 'β': 0.523160926589976}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                 |   train |   test |    oos |    rnd |
|:-------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_hs=True rank.α=1.6 rank.β=0.52 lr=0.00019 hra.apply_GS=True hra.r=379 |   0.826 | -0.787 | -0.578 | 17.544 |


[I 2024-10-06 08:01:06,175] Trial 69 finished with value: 1.0655105973025047 and parameters: {'lr': 0.0001248730975008268, 'r': 231, 'apply_GS': True, 'α': 1.5451432922945487, 'β': 1.10382516653629}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=1.5 rank.β=1.1 lr=0.00012 hra.apply_GS=True hra.r=231 |   5.785 |      0 | 6.551 | 15.789 |


[I 2024-10-06 08:04:24,080] Trial 70 finished with value: 1.0597302504816957 and parameters: {'lr': 0.00048262849408558416, 'r': 287, 'apply_GS': True, 'α': 0.24178700076464854, 'β': 0.9954018112718899}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                               |   train |   test |   oos |   rnd |
|:-----------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_hs=True rank.α=0.24 rank.β=1 lr=0.00048 hra.apply_GS=True hra.r=287 |   3.306 |      0 | 5.973 | 3.509 |


[I 2024-10-06 08:07:45,298] Trial 71 finished with value: 0.48554913294797697 and parameters: {'lr': 0.001955823291657308, 'r': 330, 'apply_GS': True, 'α': 0.22508429317882464, 'β': 1.7589269365580213}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                               |   train |    test |     oos |    rnd |
|:-----------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_hs=True rank.α=0.23 rank.β=1.8 lr=0.002 hra.apply_GS=True hra.r=330 | -29.752 | -28.346 | -51.445 | 21.053 |


[I 2024-10-06 08:11:06,898] Trial 72 finished with value: 1.0250481695568403 and parameters: {'lr': 0.0004186029095108628, 'r': 424, 'apply_GS': True, 'α': 0.07396012866185511, 'β': 1.3175740408557266}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                  |   train |   test |   oos |     rnd |
|:--------------------------------------------------------------------------------------|--------:|-------:|------:|--------:|
| ReprPO collect_hs=True rank.α=0.074 rank.β=1.3 lr=0.00042 hra.apply_GS=True hra.r=424 |  -0.826 |  0.787 | 2.505 | -12.281 |


[I 2024-10-06 08:14:29,145] Trial 73 finished with value: 1.0481695568400773 and parameters: {'lr': 0.0001202403424202951, 'r': 273, 'apply_GS': True, 'α': 0.3393223990913148, 'β': 0.9840101338921455}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                  |   train |   test |   oos |    rnd |
|:--------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=0.34 rank.β=0.98 lr=0.00012 hra.apply_GS=True hra.r=273 |   4.132 |      0 | 4.817 | 17.544 |


[I 2024-10-06 08:17:51,826] Trial 74 finished with value: 0.6801541425818883 and parameters: {'lr': 0.0007223639068357127, 'r': 207, 'apply_GS': True, 'α': 3.709814248441998, 'β': 1.3290524372440018}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |   test |     oos |    rnd |
|:------------------------------------------------------------------------------------|--------:|-------:|--------:|-------:|
| ReprPO collect_hs=True rank.α=3.7 rank.β=1.3 lr=0.00072 hra.apply_GS=True hra.r=207 |  -6.612 |  -3.15 | -31.985 | -8.772 |


[I 2024-10-06 08:21:14,012] Trial 75 finished with value: 0.9884393063583816 and parameters: {'lr': 5.378195107805544e-05, 'r': 251, 'apply_GS': True, 'α': 0.6192205127779913, 'β': 2.3509612449924813}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                 |   train |   test |    oos |    rnd |
|:-------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_hs=True rank.α=0.62 rank.β=2.4 lr=5.4e-05 hra.apply_GS=True hra.r=251 |   0.826 |      0 | -1.156 | 19.298 |


[I 2024-10-06 08:24:36,700] Trial 76 finished with value: 0.9421965317919077 and parameters: {'lr': 0.00024290830430073534, 'r': 328, 'apply_GS': True, 'α': 1.414595341335333, 'β': 0.636364499436993}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                 |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=1.4 rank.β=0.64 lr=0.00024 hra.apply_GS=True hra.r=328 |   2.479 | -0.787 | -5.78 | -1.754 |


[I 2024-10-06 08:27:58,538] Trial 77 finished with value: 0.9075144508670521 and parameters: {'lr': 0.00046148172788641165, 'r': 159, 'apply_GS': True, 'α': 2.2292990794476633, 'β': 0.8884552322963928}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                 |   train |   test |    oos |     rnd |
|:-------------------------------------------------------------------------------------|--------:|-------:|-------:|--------:|
| ReprPO collect_hs=True rank.α=2.2 rank.β=0.89 lr=0.00046 hra.apply_GS=True hra.r=159 |   4.959 | -1.575 | -9.249 | -19.298 |


[I 2024-10-06 08:31:21,243] Trial 78 finished with value: 1.0115606936416186 and parameters: {'lr': 8.846897825710458e-05, 'r': 237, 'apply_GS': True, 'α': 13.416482610100687, 'β': 0.5774380278605253}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=13 rank.β=0.58 lr=8.8e-05 hra.apply_GS=True hra.r=237 |   4.132 |      0 | 1.156 | -3.509 |


[I 2024-10-06 08:34:42,411] Trial 79 finished with value: 1.1098265895953758 and parameters: {'lr': 0.00023174505424101272, 'r': 450, 'apply_GS': True, 'α': 0.01927930235219434, 'β': 0.7324943218421319}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                   |   train |   test |    oos |    rnd |
|:---------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_hs=True rank.α=0.019 rank.β=0.73 lr=0.00023 hra.apply_GS=True hra.r=450 |   4.132 |      0 | 10.983 | -1.754 |


[I 2024-10-06 08:38:05,668] Trial 80 finished with value: 0.5279383429672447 and parameters: {'lr': 0.0010693420554659812, 'r': 462, 'apply_GS': True, 'α': 0.023890351229978596, 'β': 0.7426673124276196}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                  |   train |    test |     oos |    rnd |
|:--------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_hs=True rank.α=0.024 rank.β=0.74 lr=0.0011 hra.apply_GS=True hra.r=462 | -20.661 | -18.898 | -47.206 | 19.298 |


[I 2024-10-06 08:41:27,443] Trial 81 finished with value: 1.0828516377649327 and parameters: {'lr': 0.0002517246162022116, 'r': 33, 'apply_GS': True, 'α': 0.14946357994972606, 'β': 1.573507191194125}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                |   train |   test |   oos |    rnd |
|:------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=0.15 rank.β=1.6 lr=0.00025 hra.apply_GS=True hra.r=33 |   3.306 | -0.787 | 8.285 | 33.333 |


[I 2024-10-06 08:44:51,324] Trial 82 finished with value: 1.0308285163776494 and parameters: {'lr': 0.00010505694964037513, 'r': 18, 'apply_GS': True, 'α': 0.015946119685078333, 'β': 3.247881167442306}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                 |   train |   test |   oos |    rnd |
|:-------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_hs=True rank.α=0.016 rank.β=3.2 lr=0.00011 hra.apply_GS=True hra.r=18 |   2.479 |      0 | 3.083 | 10.526 |


[I 2024-10-06 08:48:13,907] Trial 83 finished with value: 0.9865125240847784 and parameters: {'lr': 0.0002159261134946999, 'r': 34, 'apply_GS': True, 'α': 0.08289588485171438, 'β': 2.134476185794796}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                 |   train |   test |    oos |    rnd |
|:-------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_hs=True rank.α=0.083 rank.β=2.1 lr=0.00022 hra.apply_GS=True hra.r=34 |   5.785 |      0 | -1.349 | 10.526 |


[I 2024-10-06 08:51:32,424] Trial 84 finished with value: 0.8285163776493257 and parameters: {'lr': 0.0006735404734733726, 'r': 362, 'apply_GS': True, 'α': 0.17539342040087633, 'β': 1.5130853594130536}. Best is trial 53 with value: 1.117533718689788.



| acc_inc/eval_ds [pp]                                                                 |   train |   test |     oos |    rnd |
|:-------------------------------------------------------------------------------------|--------:|-------:|--------:|-------:|
| ReprPO collect_hs=True rank.α=0.18 rank.β=1.5 lr=0.00067 hra.apply_GS=True hra.r=362 |       0 | -4.724 | -17.148 | 15.789 |


[I 2024-10-06 08:51:33,340] Using an existing study with name 'dpo' instead of creating a new one.


loaded 30 dpo trials
| dpo N=✓28/✖30, best=1.198   |   importance |        best |
|:----------------------------|-------------:|------------:|
| lr                          |            1 | 0.000279486 |


[I 2024-10-06 08:54:53,193] Trial 30 finished with value: 1.2620423892100192 and parameters: {'lr': 0.00025477333688550156}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.00025         |   5.785 |  0.787 | 26.204 | 10.526 |


[I 2024-10-06 08:58:16,726] Trial 31 finished with value: 0.5202312138728324 and parameters: {'lr': 0.0010089911001035861}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |     oos |   rnd |
|:-----------------------|--------:|-------:|--------:|------:|
| DPO lr=0.001           | -19.008 | -18.11 | -47.977 | 3.509 |


[I 2024-10-06 09:01:35,465] Trial 32 finished with value: 1.2119460500963393 and parameters: {'lr': 0.0003024050342772714}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.0003          |   5.785 | -0.787 | 21.195 | 26.316 |


[I 2024-10-06 09:04:56,496] Trial 33 finished with value: 1.1849710982658959 and parameters: {'lr': 0.0002660113741920887}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |   rnd |
|:-----------------------|--------:|-------:|-------:|------:|
| DPO lr=0.00027         |   4.959 |      0 | 18.497 | 3.509 |


[I 2024-10-06 09:08:15,384] Trial 34 finished with value: 0.5183044315992293 and parameters: {'lr': 0.0007326081810912613}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.00073         | -16.529 | -21.26 | -48.17 | 22.807 |


[I 2024-10-06 09:11:33,022] Trial 35 finished with value: 1.1560693641618498 and parameters: {'lr': 0.00024342730591403548}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |   rnd |
|:-----------------------|--------:|-------:|-------:|------:|
| DPO lr=0.00024         |   5.785 |      0 | 15.607 | 7.018 |


[I 2024-10-06 09:14:55,571] Trial 36 finished with value: 0.9922928709055877 and parameters: {'lr': 3.875227167611519e-05}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=3.9e-05         |   5.785 | -1.575 | -0.771 | 12.281 |


[I 2024-10-06 09:18:15,044] Trial 37 finished with value: 0.6146435452793835 and parameters: {'lr': 0.002735281468893447}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |    test |     oos |    rnd |
|:-----------------------|--------:|--------:|--------:|-------:|
| DPO lr=0.0027          | -10.744 | -14.961 | -38.536 | 19.298 |


[I 2024-10-06 09:21:27,996] Trial 38 finished with value: 1.001926782273603 and parameters: {'lr': 7.124252843547831e-05}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |    rnd |
|:-----------------------|--------:|-------:|------:|-------:|
| DPO lr=7.1e-05         |   5.785 | -2.362 | 0.193 | 21.053 |


[I 2024-10-06 09:24:42,447] Trial 39 finished with value: 0.418111753371869 and parameters: {'lr': 0.008523866637217492}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |    test |     oos |    rnd |
|:-----------------------|--------:|--------:|--------:|-------:|
| DPO lr=0.0085          | -55.372 | -51.181 | -58.189 | -3.509 |


[I 2024-10-06 09:27:56,074] Trial 40 finished with value: 1.0289017341040463 and parameters: {'lr': 1.1378854545626296e-05}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |    rnd |
|:-----------------------|--------:|-------:|------:|-------:|
| DPO lr=1.1e-05         |   3.306 | -0.787 |  2.89 | 10.526 |


[I 2024-10-06 09:31:11,139] Trial 41 finished with value: 1.1290944123314066 and parameters: {'lr': 0.00027680346726999976}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.00028         |   4.959 |      0 | 12.909 | 12.281 |


[I 2024-10-06 09:34:28,134] Trial 42 finished with value: 1.042389210019268 and parameters: {'lr': 2.6870239798129743e-06}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |   rnd |
|:-----------------------|--------:|-------:|------:|------:|
| DPO lr=2.7e-06         |   1.653 |      0 | 4.239 |     0 |


[I 2024-10-06 09:37:39,883] Trial 43 finished with value: 1.1156069364161851 and parameters: {'lr': 0.00022813978301314662}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |    oos |    rnd |
|:-----------------------|--------:|-------:|-------:|-------:|
| DPO lr=0.00023         |   4.959 | -0.787 | 11.561 | -3.509 |


[I 2024-10-06 09:40:55,742] Trial 44 finished with value: 0.720616570327553 and parameters: {'lr': 0.000623805578829016}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |     oos |   rnd |
|:-----------------------|--------:|-------:|--------:|------:|
| DPO lr=0.00062         |  -3.306 | -4.724 | -27.938 | 7.018 |


[I 2024-10-06 09:44:07,806] Trial 45 finished with value: 0.556840077071291 and parameters: {'lr': 0.0018158864605505986}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |    test |     oos |   rnd |
|:-----------------------|--------:|--------:|--------:|------:|
| DPO lr=0.0018          | -14.876 | -18.898 | -44.316 | 7.018 |


[I 2024-10-06 09:47:20,811] Trial 46 finished with value: 0.9210019267822737 and parameters: {'lr': 0.0003531329647060864}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |   rnd |
|:-----------------------|--------:|-------:|------:|------:|
| DPO lr=0.00035         |   4.132 |      0 |  -7.9 | 5.263 |


[I 2024-10-06 09:50:34,173] Trial 47 finished with value: 1.0077071290944124 and parameters: {'lr': 6.941972990246638e-05}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |    rnd |
|:-----------------------|--------:|-------:|------:|-------:|
| DPO lr=6.9e-05         |   5.785 |  -3.15 | 0.771 | 12.281 |


[I 2024-10-06 09:53:42,327] Trial 48 finished with value: 1.0346820809248556 and parameters: {'lr': 0.0001964883188114603}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |   test |   oos |   rnd |
|:-----------------------|--------:|-------:|------:|------:|
| DPO lr=0.0002          |   5.785 | -2.362 | 3.468 | 8.772 |


[I 2024-10-06 09:56:57,460] Trial 49 finished with value: 0.4200385356454721 and parameters: {'lr': 0.0007548147413333964}. Best is trial 30 with value: 1.2620423892100192.



| acc_inc/eval_ds [pp]   |   train |    test |     oos |    rnd |
|:-----------------------|--------:|--------:|--------:|-------:|
| DPO lr=0.00075         | -34.711 | -31.496 | -57.996 | 22.807 |


[I 2024-10-06 09:56:58,249] Using an existing study with name 'projbp' instead of creating a new one.


loaded 44 projbp trials
| projbp N=✓10/✖44, best=1.033   |   importance |        best |
|:-------------------------------|-------------:|------------:|
| β                              |        0.512 | 0.366362    |
| lr                             |        0.311 | 2.88884e-06 |
| scale_orth                     |        0.128 | 0           |
| mag_clip                       |        0.021 |             |
| neg_slope                      |        0.014 | 0           |
| reverse_pref                   |        0.014 | 0           |


[W 2024-10-06 09:57:06,839] Trial 44 failed with parameters: {'lr': 0.000510881708506212, 'β': 0.40691929271586114, 'reverse_pref': False, 'scale_orth': False, 'neg_slope': 0, 'mag_clip': 'float', 'mag_clip_value': 0.014599142035203292} because of the following error: RuntimeError('The size of tensor a (512) must match the size of tensor b (16) at non-singleton dimension 2').
Traceback (most recent call last):
  File "/workspace/repr-preference-optimization/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_277857/875328504.py", line 19, in objective
    r = objective_func(kwargs, trial, starter_experiment_name)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_277857/875328504.py", line 14, in objective_func
    r = train(cfg, trial=trial)
        ^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/repr-preference-optimization/reprpo

loaded 43 ether-prefvec trials
| ether-prefvec N=✓41/✖43, best=1.183   |   importance | best                   |
|:--------------------------------------|-------------:|:-----------------------|
| lr                                    |        0.498 | 0.00037772770210724844 |
| β                                     |        0.287 | 1.9848539330526844     |
| nb                                    |        0.136 | 20                     |
| reduction                             |        0.06  | 1                      |
| collect_hs                            |        0.003 | False                  |
| use_angle_loss                        |        0.003 | True                   |
| use_dpo_loss                          |        0.003 | False                  |
| use_orth_loss                         |        0.003 | True                   |
| weight_tokens                         |        0.003 | True                   |
| flip_side                             |        0.002 | True      

[I 2024-10-06 10:03:31,188] Trial 43 finished with value: 1.0404624277456649 and parameters: {'lr': 0.00077300924652698, 'collect_input': True, 'collect_hs': False, 'nb': 21, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 0.6860925636859312, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                     |   train |   test |   oos |    rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.69 lr=0.00077 ether.Htype=ether ether.flip_side=True ether.nb=21 ether.reduction=1 |  -6.612 | -1.575 | 4.046 | -7.018 |


[I 2024-10-06 10:06:47,734] Trial 44 finished with value: 1.0770712909441233 and parameters: {'lr': 0.00017094063229263905, 'collect_input': True, 'collect_hs': False, 'nb': 12, 'Htype': 'ether', 'flip_side': True, 'reduction': 4, 'β': 0.9545933239627128, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                     |   train |   test |   oos |   rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.95 lr=0.00017 ether.Htype=ether ether.flip_side=True ether.nb=12 ether.reduction=4 |   1.653 |  0.787 | 7.707 | 3.509 |


[I 2024-10-06 10:10:04,282] Trial 45 finished with value: 0.4566473988439307 and parameters: {'lr': 0.002762529010226291, 'collect_input': True, 'collect_hs': False, 'nb': 25, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 1.9753257441880685, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                 |   train |    test |     oos |    rnd |
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|--------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=2 lr=0.0028 ether.Htype=ether ether.flip_side=True ether.nb=25 ether.reduction=1 | -40.496 | -29.921 | -54.335 | -7.018 |


[I 2024-10-06 10:13:29,937] Trial 46 finished with value: 0.5048169556840078 and parameters: {'lr': 0.0012699094933034893, 'collect_input': True, 'collect_hs': False, 'nb': 18, 'Htype': 'ether', 'flip_side': True, 'reduction': 3, 'β': 0.8616128802853223, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                                                 |   train |   test |     oos |    rnd |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|--------:|-------:|
| ReprPO collect_input=True prefvec.use_dpo_prefvec=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.86 lr=0.0013 ether.Htype=ether ether.flip_side=True ether.nb=18 ether.reduction=3 | -17.355 | -18.11 | -49.518 | 15.789 |


[I 2024-10-06 10:16:38,612] Trial 47 finished with value: 0.41040462427745666 and parameters: {'lr': 0.004591564295469674, 'collect_input': True, 'collect_hs': True, 'nb': 13, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 2, 'β': 0.3177259963270161, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                  |   train |    test |    oos |   rnd |
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|--------:|-------:|------:|
| ReprPO collect_hs=True collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.32 lr=0.0046 ether.flip_side=True ether.nb=13 ether.reduction=2 | -45.455 | -37.008 | -58.96 | 5.263 |


[I 2024-10-06 10:20:13,537] Trial 48 finished with value: 1.044315992292871 and parameters: {'lr': 0.00015312239936361515, 'collect_input': False, 'collect_hs': False, 'nb': 27, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 1.0669954943425175, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': False, 'use_proj_rel': False}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                            |   train |   test |   oos |   rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.β=1.1 lr=0.00015 ether.Htype=ether ether.flip_side=True ether.nb=27 ether.reduction=1 |   1.653 |  0.787 | 4.432 | 5.263 |


[I 2024-10-06 10:23:39,973] Trial 49 finished with value: 1.001926782273603 and parameters: {'lr': 1.644387806403373e-07, 'collect_input': True, 'collect_hs': False, 'nb': 20, 'Htype': 'ether', 'flip_side': True, 'reduction': 4, 'β': 0.17651512809322636, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': True, 'use_nll_loss': False, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                     |   train |   test |   oos |   rnd |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|------:|------:|
| ReprPO collect_input=True prefvec.use_dpo_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.18 lr=1.6e-07 ether.Htype=ether ether.flip_side=True ether.nb=20 ether.reduction=4 |   0.826 |      0 | 0.193 |     0 |


[I 2024-10-06 10:26:56,184] Trial 50 finished with value: 0.9961464354527939 and parameters: {'lr': 3.173113358446832e-05, 'collect_input': True, 'collect_hs': False, 'nb': 6, 'Htype': 'ether', 'flip_side': True, 'reduction': 73, 'β': 1.8985707403739836e-05, 'use_orth_loss': False, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                          |   train |   test |    oos |   rnd |
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=1.9e-05 lr=3.2e-05 ether.Htype=ether ether.flip_side=True ether.nb=6 ether.reduction=73 |       0 |      0 | -0.385 | 3.509 |


[I 2024-10-06 10:30:11,879] Trial 51 finished with value: 0.6589595375722545 and parameters: {'lr': 0.0004514977092235431, 'collect_input': True, 'collect_hs': False, 'nb': 10, 'Htype': 'etherplusHH', 'flip_side': True, 'reduction': 238, 'β': 5.294038979133623e-05, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                        |   train |   test |     oos |     rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|--------:|--------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=5.3e-05 lr=0.00045 ether.flip_side=True ether.nb=10 ether.reduction=238 | -15.702 | -8.661 | -34.104 | -12.281 |


[I 2024-10-06 10:33:21,445] Trial 52 finished with value: 0.9942196531791907 and parameters: {'lr': 0.0001030037238762181, 'collect_input': True, 'collect_hs': True, 'nb': 3, 'Htype': 'ether', 'flip_side': False, 'reduction': 9, 'β': 0.48895778065673723, 'use_orth_loss': True, 'use_angle_loss': False, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                                              |   train |   test |    oos |    rnd |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_hs=True collect_input=True prefvec.use_angle_prefvec=False prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=0.49 lr=0.0001 ether.Htype=ether ether.nb=3 ether.reduction=9 |       0 |      0 | -0.578 | -3.509 |


[I 2024-10-06 10:36:46,657] Trial 53 finished with value: 1.1734104046242777 and parameters: {'lr': 0.00035276039454591463, 'collect_input': True, 'collect_hs': False, 'nb': 28, 'Htype': 'ether', 'flip_side': True, 'reduction': 1, 'β': 1.1627256835820705, 'use_orth_loss': True, 'use_angle_loss': True, 'use_dpo_loss': False, 'use_nll_loss': True, 'weight_tokens': True, 'use_proj_rel': True}. Best is trial 24 with value: 1.183044315992293.



| acc_inc/eval_ds [pp]                                                                                                                                                                                                                    |   train |   test |    oos |    rnd |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-------:|-------:|-------:|
| ReprPO collect_input=True prefvec.use_nll_prefvec=True prefvec.use_orth_prefvec=True prefvec.use_proj_rel=True prefvec.weight_tokens=True prefvec.β=1.2 lr=0.00035 ether.Htype=ether ether.flip_side=True ether.nb=28 ether.reduction=1 |   2.479 |      0 | 17.341 | -5.263 |


In [None]:
study.optimize(_objective, 
            n_trials=20, 
            gc_after_trial=True, 
            catch=(AssertionError, OSError, RuntimeError, KeyError, torch.OutOfMemoryError)
)

In [None]:
print(wandb.run.get_url())

## plot

In [None]:
# You can use Matplotlib instead of Plotly for visualization by simply replacing `optuna.visualization` with
# `optuna.visualization.matplotlib` in the following examples.
from optuna.visualization.matplotlib import plot_contour
from optuna.visualization.matplotlib import plot_edf
from optuna.visualization.matplotlib import plot_intermediate_values
from optuna.visualization.matplotlib import plot_optimization_history
from optuna.visualization.matplotlib import plot_parallel_coordinate
from optuna.visualization.matplotlib import plot_param_importances
from optuna.visualization.matplotlib import plot_rank
from optuna.visualization.matplotlib import plot_slice
from optuna.visualization.matplotlib import plot_timeline

In [None]:
search_spaces.keys()

In [None]:
exp_name = 'projgrad'
trial2args = search_spaces[exp_name]

study_name = f"{exp_name}"
study = optuna.create_study(
    study_name=study_name,
    direction="maximize",
    load_if_exists=True,
    storage=f_db,
    sampler=optuna.samplers.TPESampler(seed=SEED),
    pruner=optuna.pruners.NopPruner(),
)
print('study.best_trial', study.best_trial)
df = study.trials_dataframe().query('state == "COMPLETE"').sort_values('value', ascending=False)
print(len(df))
plot_optimization_history(study)

In [13]:
# plot_timeline(study)

In [29]:
# plot_intermediate_values(study)

In [30]:
# plot_contour(study)


In [None]:
plot_slice(study)


In [None]:
plot_param_importances(study)

In [None]:
plot_intermediate_values(study)

In [None]:
plot_parallel_coordinate(study)

### Apendix 1: dataclass 2 optuna

In [20]:
# import inspect
# import typing
# from typing import Literal

# def optuna_suggest_from_dataclass(t):
#     n = t.__name__
#     print(f'## {n}')
#     sig = inspect.signature(t)
#     for name, param in sig.parameters.items():
#         if param.annotation== bool:
#             print(f'"{name}": trial.suggest_categorical("{name}", [True, False]),')
#         elif param.annotation==int:
#             print(f'"{name}": trial.suggest_int("{name}", 1, 10),')
#         elif param.annotation ==float:
#             print(f'"{name}": trial.suggest_float("{name}", 0.1, 10.0),')
#         elif param.annotation == str:
#             print(f'"{name}": trial.suggest_categorical("{name}", ["a", "b", "c"]),')
#         elif param.annotation == tuple:
#             print(f'"{name}": trial.suggest_categorical("{name}", [(1, 2), (3, 4), (5, 6)]),')
#         elif typing.get_origin(param.annotation) == Literal:
#             print(f'"{name}": trial.suggest_categorical("{name}", {param.annotation.__args__}),')
#         else:
#             print(f"!!Unknown type {param}")
#             # print(name, param.default, param.annotation)

# optuna_suggest_from_dataclass(ReprPOConfig)
# for t in Transforms:
#     print(f'## {t}')
#     optuna_suggest_from_dataclass(t.value)
# for l in Losses:
#     print(f'## {l}')
#     optuna_suggest_from_dataclass(l.value)


# optuna_suggest_from_dataclass(DPOProjGradConfig)