# convert `genies` datasets to [open_pref_eval](https://github.com/wassname/open_pref_eval)


Here I'm taking the GENIE datasets, and 
1. converting them to preference (compatible with open_pref_eval)
2. hosting on huggingface


## Setup

```sh
python -m venv .venv --prompt GENIES
. .venv/bin/activate
pip install wheel fire requests
pip install -r requirements.txt
python ./download_data.py
```

In [6]:
%reload_ext autoreload
%autoreload 2

In [7]:
import pandas as pd
import numpy as np
from datasets import load_dataset
import datasets

from pathlib import Path
import json

In [8]:
path_to_distribution_shift_pairs = Path('../distribution_shifts/all.json')
pairs_data = json.load(open(path_to_distribution_shift_pairs))
pairs_data

[{'source': 'alpaca_easy', 'target': 'alpaca_hard'},
 {'source': 'arc_easy', 'target': 'arc_hard'},
 {'source': 'math_easy', 'target': 'math_hard'},
 {'source': 'code_easy', 'target': 'code_hard'},
 {'source': 'ranking_logic_easy', 'target': 'ranking_logic_hard'},
 {'source': 'raven_easy', 'target': 'raven_matrices'},
 {'source': 'alpaca_mmlu', 'target': 'spanish_input'},
 {'source': 'alpaca_mmlu', 'target': 'spanish_output'},
 {'source': 'alpaca_mmlu', 'target': 'comma_separated_input'},
 {'source': 'alpaca_mmlu', 'target': 'comma_separated_output'},
 {'source': 'alpaca_mmlu', 'target': 'ranking_logic'},
 {'source': 'alpaca_mmlu', 'target': 'raven_matrices'},
 {'source': 'alpaca_mmlu', 'target': 'word_swap'},
 {'source': 'code', 'target': 'counterfactual_python'},
 {'source': 'code', 'target': 'us_history'},
 {'source': 'code', 'target': 'change_my_view'},
 {'source': 'cooking', 'target': 'math'},
 {'source': 'cooking', 'target': 'raven_matrices'},
 {'source': 'math', 'target': 'chang

In [9]:
from datasets import DatasetInfo, Dataset

def genie2ds(train: list) -> pd.DataFrame:
    """takes the GENIE format and convert it to to a dataframe of preference format."""
    outs = []
    for i, row in enumerate(train):
        s = pd.Series(row['responses'])
        chosen = s[s==1].index[0]
        rejected = s[s==0].index
        outs += [dict(prompt=row['prompt'], chosen=chosen, rejected=r, i=i) for r in rejected]

    df = pd.DataFrame(outs)
    return df



def json2ds(source_dir: Path) -> Dataset:
    test = json.load(open(source_dir / 'test.json'))
    train = json.load(open(source_dir / 'train.json'))
    metadata = json.load(open(source_dir / 'metadata.json'))
    ds_info = DatasetInfo(
        description= f"GENIE:{metadata['id']}",
        citation= """@misc{clymer2023generalizationanalogiestestbedgeneralizing,
        title={Generalization Analogies: A Testbed for Generalizing AI Oversight to Hard-To-Measure Domains}, 
        author={Joshua Clymer and Garrett Baker and Rohan Subramani and Sam Wang},
        year={2023},
        eprint={2311.07723},
        archivePrefix={arXiv},
        primaryClass={cs.AI},
        url={https://arxiv.org/abs/2311.07723}, 
    }""",
        homepage= "https://joshuaclymer.github.io/generalization-analogies-website/",
        license= "MIT",
        config_name=f"{metadata['id']}",
    )


    df_train = genie2ds(train)
    df_test = genie2ds(test)
    dataset2 = datasets.DatasetDict(
            {'train': datasets.Dataset.from_pandas(df_train, info=ds_info),
                'test': datasets.Dataset.from_pandas(df_test, info=ds_info)}
        )
    return dataset2

In [10]:
dist_dir = Path('../distributions')

for pair in  pairs_data:
    for key in ['source', 'target']:
        source_dir = dist_dir / pair[key]
        metadata = json.load(open(source_dir / 'metadata.json'))
        config_name = metadata['id']

        # first check if it exists
        try:
            load_dataset(f"wassname/genie_dpo", name=config_name, keep_in_memory=False, split='train[:1]')
        except ValueError as e:
            print(e)
            print(f"Dataset {config_name} does not exist, creating")

            dataset2 = json2ds(source_dir)
            print(source_dir, config_name, dataset2)

            dataset2.push_to_hub("wassname/genie_dpo", config_name=config_name)
        else:
            print(f"Dataset {config_name} already exists, skipping")

        

Dataset alpaca_easy already exists, skipping
Dataset alpaca_hard already exists, skipping
Dataset arc_easy already exists, skipping
Dataset arc_hard already exists, skipping
Dataset math_easy already exists, skipping
Dataset math_hard already exists, skipping
Dataset code_easy already exists, skipping
Dataset code_hard already exists, skipping
Dataset ranking_logic_easy already exists, skipping
Dataset ranking_logic_hard already exists, skipping
Dataset raven_easy already exists, skipping
Dataset raven_matrices already exists, skipping
Dataset alpaca_mmlu already exists, skipping
Dataset spanish_input already exists, skipping
Dataset alpaca_mmlu already exists, skipping
Dataset spanish_output already exists, skipping
Dataset alpaca_mmlu already exists, skipping
Dataset comma_separated_input already exists, skipping
Dataset alpaca_mmlu already exists, skipping
Dataset comma_separated_output already exists, skipping
Dataset alpaca_mmlu already exists, skipping
Dataset ranking_logic alrea

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading readme:   0%|          | 0.00/13.0k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/443k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/181k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1800 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/750 [00:00<?, ? examples/s]

Dataset us_history_textbook already exists, skipping
BuilderConfig 'us_history_fiction' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_textbook', 'word_swap']
Dataset us_history_fiction does not exist, creating
../distributions/us_history_fiction us_history_fiction DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1755
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 750
    })
})


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/13.0k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/494k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/207k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1755 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/750 [00:00<?, ? examples/s]

Dataset us_history_fiction already exists, skipping
BuilderConfig 'us_history_make_questions' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_textbook', 'word_swap']
Dataset us_history_make_questions does not exist, creating
../distributions/us_history_make_questions us_history_make_questions DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1800
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 750
    })
})


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/14.0k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/285k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/120k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1800 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/750 [00:00<?, ? examples/s]

Dataset us_history_make_questions already exists, skipping
Dataset us_history already exists, skipping
Dataset math already exists, skipping
BuilderConfig 'math_fiction' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset math_fiction does not exist, creating
../distributions/math_fiction math_fiction DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1800
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 750
    })
})


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/14.0k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/14.5k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/357k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/150k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1800 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/750 [00:00<?, ? examples/s]

Dataset math_fiction already exists, skipping
BuilderConfig 'math_textbook' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset math_textbook does not exist, creating
../distributions/math_textbook math_textbook DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1800
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 750
    })
})


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/14.5k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/15.0k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/390k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/160k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1800 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/750 [00:00<?, ? examples/s]

Dataset math_textbook already exists, skipping
BuilderConfig 'math_make_questions' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset math_make_questions does not exist, creating
../distributions/math_make_questions math_make_questions DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1773
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 750
    })
})


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/15.0k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/15.5k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/160k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/72.2k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1773 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/750 [00:00<?, ? examples/s]

Dataset math_make_questions already exists, skipping
Dataset math already exists, skipping
BuilderConfig 'alpaca_low_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset alpaca_low_quality does not exist, creating
../distributions/alpaca_low_quality alpaca_low_quality DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1800
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/15.5k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/16.1k [00:00<?, ?B/s]

BuilderConfig 'alpaca_high_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset alpaca_high_quality does not exist, creating
../distributions/alpaca_high_quality alpaca_high_quality DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1800
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 750
    })
})


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/16.1k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/16.6k [00:00<?, ?B/s]

BuilderConfig 'shp_low_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset shp_low_quality does not exist, creating
../distributions/shp_low_quality shp_low_quality DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 600
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 250
    })
})


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/16.6k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/17.1k [00:00<?, ?B/s]

BuilderConfig 'shp_high_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_low_quality', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset shp_high_quality does not exist, creating
../distributions/shp_high_quality shp_high_quality DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 600
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 250
    })
})


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/17.1k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/17.6k [00:00<?, ?B/s]

BuilderConfig 'code_low_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset code_low_quality does not exist, creating
../distributions/code_low_quality code_low_quality DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1200
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 500
    })
})


Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/17.6k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/18.1k [00:00<?, ?B/s]

Dataset code already exists, skipping
Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'truthful_qa' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset truthful_qa does not exist, creating
../distributions/truthful_qa truthful_qa DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1302
    })
    test: Dataset({
       

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/18.1k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/18.6k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'personality_traits' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset personality_traits does not exist, creating
../distributions/personality_traits personality_traits DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1200
    })
    test: Dataset({
  

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/18.6k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/19.1k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'survival_influence' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset survival_influence does not exist, creating
../distributions/survival_influence survival_influence DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 600
    })
 

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/19.1k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/19.6k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'gender_bias' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset gender_bias does not exist, creating
../distributions/gender_bias gender_bias DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
        num_rows: 1200
    })
    te

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/19.6k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/20.1k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'punishment_avoidance' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset punishment_avoidance does not exist, creating
../distributions/punishment_avoidance punishment_avoidance DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 're

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/20.1k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/20.6k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'reward_seeking' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset reward_seeking does not exist, creating
../distributions/reward_seeking reward_seeking DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 're

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/20.6k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/21.1k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'crt_1' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset crt_1 does not exist, creating
../distributions/crt_1 crt_1 DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'i'],
   

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/21.1k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/21.6k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'crt_2' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset crt_2 does not exist, creating
../distributions/crt_2 crt_2 DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/21.6k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/22.0k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'crt_3' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset crt_3 does not exist, creating
../distributions/crt_3 crt_3 DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 're

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/22.0k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'sycophancy_mimicry' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset sycophancy_mimicry does not exist, creating
../distributions/sycophancy_mimicry sycophancy_mimicry DatasetDict({
  

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/23.0k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'sycophancy_answer' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset sycophancy_answer does not exist, creating
../distributions/sycophancy_answer sycophancy_answe

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/23.0k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/23.5k [00:00<?, ?B/s]

Dataset alpaca_mmlu already exists, skipping
BuilderConfig 'sycophancy_feedback' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset sycophancy_feedback does not exist, creating
../distributions/sycophanc

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/23.5k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/24.0k [00:00<?, ?B/s]

BuilderConfig 'alpaca_chat' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset alpaca_chat does not exist, creating
../distributions/alpaca_chat alpaca_chat DatasetDict({
    train

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/24.0k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/24.5k [00:00<?, ?B/s]

BuilderConfig 'sycophancy_are_you_sure' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset sycophancy_are_you_sure does not exist, creating
../distributions/sycophan

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/24.5k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/25.0k [00:00<?, ?B/s]

BuilderConfig 'pursue_goals' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset pursue_goals does not exist, creating
../distributions/pur

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/25.0k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/25.5k [00:00<?, ?B/s]

BuilderConfig 'relinquish_power' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset relinquish_power does not exist, creat

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/25.5k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/26.0k [00:00<?, ?B/s]

BuilderConfig 'creative_writing' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'relinquish_power', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']
Dataset creative_writing d

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/26.0k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/26.5k [00:00<?, ?B/s]

BuilderConfig 'biology_with_literary_style' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'creative_writing', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'relinquish_power', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_sw

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/26.5k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/27.1k [00:00<?, ?B/s]

BuilderConfig 'alpaca_short' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'biology_with_literary_style', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'creative_writing', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'relinquish_power', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_tex

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/27.1k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/27.6k [00:00<?, ?B/s]

BuilderConfig 'alpaca_long' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'alpaca_short', 'arc_easy', 'arc_hard', 'biology_with_literary_style', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'creative_writing', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'relinquish_power', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Downloading metadata:   0%|          | 0.00/27.6k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/28.0k [00:00<?, ?B/s]