In [4]:
from utils import load_sample
import numpy as np
import json

train_data = load_sample('../data/mixed_dataset_ourRM_ALL_token_num_train.jsonl')
validation_data = load_sample('../data/mixed_dataset_ourRM_ALL_token_num_validation.jsonl')
test_data = load_sample('../data/mixed_dataset_ourRM_ALL_token_num_test.jsonl')

In [5]:
model_name_list = ['codestral-22b', 'gpt-4o', 'gpt-35-turbo', 'llama-31-8b', 'mistral-7b', 'mistral-8x7b', 'phi-3-medium', 'phi-3-mini']
n = 5           # maximal number of responses to sample for best-of-n sampling
sample_size = 10

In [3]:
# translate mixed_dataset into hybrid llm formats. Prepare test datasets of best-of-n sampling and SINGLE response
with open(f'../data/hybridllm_dataset_armoRM_ourRM_bo{n}_ALL_token_num_train.jsonl', 'w') as f:
    for d in train_data:
        new_d = {}
        new_d['id'] = d['id']
        new_d['instruction'] = ''
        new_d['input'] = d['prompt']
        new_d['output'] = ''
        new_d['cmp_results'] = ''
        new_d['candidates'] = []
        for model_name in model_name_list:
            for _n in range(1, n+1):
                n2scores = {}
                oracle_scores = d[model_name]['armoRM_scores']
                proxy_scores = d[model_name]['ourRM_scores']

                rand_indices = [np.random.choice(len(proxy_scores), size=_n) for _ in range(sample_size)]
                bon_indices = [np.where(np.asarray(proxy_scores) == max(np.asarray(proxy_scores)[_]))[0][0] for _ in rand_indices]
                new_scores = [oracle_scores[_] for _ in bon_indices]
                new_token_num_responses = [int(sum(np.asarray(d[model_name]['token_num_responses'])[_])) for _ in rand_indices]
                n2scores[f'armoRM_scores'] = new_scores

                new_d['candidates'].append({'model': f'{model_name}_ourRM_bo{_n}',
                                            'text': ['' for _ in range(sample_size)],
                                            'scores': n2scores,
                                            'token_num_prompt': d[model_name]['token_num_prompt'],
                                            'token_num_responses': new_token_num_responses})
        f.write(json.dumps(new_d) + "\n")

In [4]:
with open(f'../data/hybridllm_dataset_armoRM_ourRM_bo{n}_ALL_token_num_validation.jsonl', 'w') as f:
    for d in validation_data:
        new_d = {}
        new_d['id'] = d['id']
        new_d['instruction'] = ''
        new_d['input'] = d['prompt']
        new_d['output'] = ''
        new_d['cmp_results'] = ''
        new_d['candidates'] = []
        for model_name in model_name_list:
            for _n in range(1, n+1):
                n2scores = {}
                oracle_scores = d[model_name]['armoRM_scores']
                proxy_scores = d[model_name]['ourRM_scores']

                rand_indices = np.random.choice(len(proxy_scores), size=_n)
                bon_index = np.where(np.asarray(proxy_scores) == max(np.asarray(proxy_scores)[rand_indices]))[0][0]
                new_scores = oracle_scores[bon_index]
                new_token_num_responses = int(sum(np.asarray(d[model_name]['token_num_responses'])[rand_indices]))
                n2scores[f'armoRM_scores'] = new_scores

                new_d['candidates'].append({'model': f'{model_name}_ourRM_bo{_n}',
                                            'text': '',
                                            'scores': n2scores,
                                            'token_num_prompt': d[model_name]['token_num_prompt'],
                                            'token_num_responses': new_token_num_responses})
        f.write(json.dumps(new_d) + "\n")

In [5]:
with open(f'../data/hybridllm_dataset_armoRM_ourRM_bo{n}_ALL_token_num_test.jsonl', 'w') as f:
    for d in test_data:
        new_d = {}
        new_d['id'] = d['id']
        new_d['instruction'] = ''
        new_d['input'] = d['prompt']
        new_d['output'] = ''
        new_d['cmp_results'] = ''
        new_d['candidates'] = []
        for model_name in model_name_list:
            for _n in range(1, n+1):
                n2scores = {}
                oracle_scores = d[model_name]['armoRM_scores']
                proxy_scores = d[model_name]['ourRM_scores']

                rand_indices = np.random.choice(len(proxy_scores), size=_n)
                bon_index = np.where(np.asarray(proxy_scores) == max(np.asarray(proxy_scores)[rand_indices]))[0][0]
                new_scores = oracle_scores[bon_index]
                new_token_num_responses = int(sum(np.asarray(d[model_name]['token_num_responses'])[rand_indices]))
                n2scores[f'armoRM_scores'] = new_scores

                new_d['candidates'].append({'model': f'{model_name}_ourRM_bo{_n}',
                                            'text': '',
                                            'scores': n2scores,
                                            'token_num_prompt': d[model_name]['token_num_prompt'],
                                            'token_num_responses': new_token_num_responses})
        f.write(json.dumps(new_d) + "\n")