In [1]:
# import necessary packages
import os, sys
sys.path.append(r'/elo_bench')
from datamodel import QuestionCollection

import pandas as pd
from pathlib import Path
from collections import defaultdict

In [1]:
"""convert gemini answers to the designed format csv"""
if False:
    gemini_answers = pd.read_json(r'./gemini_pro_answer_20573_20231228.jsonl', lines=True)

    # take 'question' as row index, and 'answer' as cell of ['question', 'gemini']
    gemini_answers = gemini_answers.set_index('question')
    gemini_answers = gemini_answers.rename(columns={'answer': 'gemini'})
    gemini_answers.to_csv(r'./q_and_as gemini.csv')
    gemini_answers

In [10]:
"""list csv splits models name with answers count and percentage"""
if True:
    split_csv_files = [
        r'./q_and_as 2a100.csv',
        r'./q_and_as 0118 1503 new.csv',
        r'./q_and_as 0118 1673 new.csv',
        r'./q_and_as 0116 1673.csv',
        r'./q_and_as 1691', # √
        r'./q_and_as gemini.csv', # √
        r'./q_and_as 0118 2a100 split new.csv'
    ]

    for split_csv in split_csv_files:
        print(f'Printing information in {split_csv}...')
        answer_split = pd.read_csv(split_csv, engine='python', keep_default_na=False, na_values=['NaN', 'NULL'])
        skip_columns = ['Unnamed: 0', 'question']
        models = [x for x in answer_split.columns.tolist() if not x in (skip_columns)]
        for model in models:
            print(f'{len(answer_split[model][~answer_split[model].isna()].tolist())} answers for {model}')

Printing information in ./q_and_as 2a100.csv...
21118 answers for lmsys/vicuna-7b-v1.5
21083 answers for gpt-4-turbo
21090 answers for gpt-35-turbo
21191 answers for lmsys/vicuna-13b-v1.5
21114 answers for lmsys/vicuna-33b-v1.3
21115 answers for meta-llama/Llama-2-7b-chat-hf
21119 answers for meta-llama/Llama-2-13b-chat-hf
105 answers for huggyllama/llama-13b
14577 answers for meta-llama/Llama-2-70b-chat-hf
105 answers for tiiuae/falcon-7b-instruct
105 answers for tiiuae/falcon-40b-instruct
105 answers for mosaicml/mpt-7b-chat
105 answers for WizardLM/WizardLM-13B-V1.2
822 answers for Xwin-LM/Xwin-LM-7B-V0.1
854 answers for chavinlo/alpaca-13b
853 answers for WizardLM/WizardLM-7B-V1.0
809 answers for chavinlo/alpaca-native
824 answers for Xwin-LM/Xwin-LM-13B-V0.1
105 answers for mosaicml/mpt-30b-chat
105 answers for huggyllama/llama-7b
105 answers for HuggingFaceH4/zephyr-7b-beta
20 answers for huggyllama/llama-30b
Printing information in ./q_and_as 0118 1503.csv...
21963 answers for X

In [6]:
"""Merge the answers for dataset"""
if True:
    dataset_dir = r'/elo_bench/data/google_quora_alpaca_sharegpt_chatlm_clean_20772'
    questions = QuestionCollection.read_csv(Path(dataset_dir)/'questions.csv').questions
    question_set = set(questions)

    print(f'{len(questions)} questions.')

    split_csv_files = [
        r'./q_and_as 2a100.csv',
        r'./q_and_as 0118 1503.csv',
        r'./q_and_as 0118 1673.csv',
        r'./q_and_as 0116 1673.csv',
        r'./q_and_as 1691', # √
        r'./q_and_as gemini.csv', # √
        r'./q_and_as 0118 2A100 split.csv',
    ]

    q_and_as_dict = defaultdict(lambda: defaultdict(str)) #  a_and_as_dict['question']['model']=ans
    for split_csv in split_csv_files:
        print(f'Printing information in {split_csv}...')
        answer_split = pd.read_csv(split_csv, keep_default_na=False, na_values=['NaN'], engine='python')
        # answer_splits.append(answer_split)
        skip_columns = ['Unnamed: 0', 'question']
        models = [str(x) for x in answer_split.columns.tolist() if not x in (skip_columns)]
        for model in models:
            print(model)
            for index, row in answer_split.iterrows():
                question = row['question']
                model_answer = row[model]
                # print(f'{question}:{model_answer}')
                if not pd.isna(model_answer):
                    q_and_as_dict[question][model] = model_answer

    reformat_matrix = []
    for question_key, model_answers in q_and_as_dict.items():
        if question_key not in question_set:
            continue
        item = {
            'question': question_key,
        }
        for model_name, ans in model_answers.items():
            item[model_name] = ans
        
        reformat_matrix.append(item)
        
        
    df = pd.DataFrame.from_dict(reformat_matrix)
    df
    df.to_csv(r'./merge.csv', na_rep='NaN')

[DEBUG] Removed 48 repeat questions.


20772 questions.
Printing information in ./q_and_as 2a100.csv...
lmsys/vicuna-7b-v1.5
gpt-4-turbo
gpt-35-turbo
lmsys/vicuna-13b-v1.5
lmsys/vicuna-33b-v1.3
meta-llama/Llama-2-7b-chat-hf
meta-llama/Llama-2-13b-chat-hf
huggyllama/llama-13b
meta-llama/Llama-2-70b-chat-hf
tiiuae/falcon-7b-instruct
tiiuae/falcon-40b-instruct
mosaicml/mpt-7b-chat
WizardLM/WizardLM-13B-V1.2
Xwin-LM/Xwin-LM-7B-V0.1
chavinlo/alpaca-13b
WizardLM/WizardLM-7B-V1.0
chavinlo/alpaca-native
Xwin-LM/Xwin-LM-13B-V0.1
mosaicml/mpt-30b-chat
huggyllama/llama-7b
HuggingFaceH4/zephyr-7b-beta
huggyllama/llama-30b
Printing information in ./q_and_as 0118 1503.csv...
Xwin-LM/Xwin-LM-13B-V0.1
Xwin-LM/Xwin-LM-7B-V0.1
WizardLM/WizardLM-7B-V1.0
WizardLM/WizardLM-13B-V1.2
mosaicml/mpt-7b-chat
mosaicml/mpt-30b-chat
HuggingFaceH4/zephyr-7b-beta
Printing information in ./q_and_as 0118 1673.csv...
huggyllama/llama-7b
huggyllama/llama-13b
huggyllama/llama-30b
chavinlo/alpaca-native
chavinlo/alpaca-13b
mosaicml/mpt-30b-chat
Printing informa

In [7]:
"""list csv splits models name with answers count and percentage"""
if True:
    split_csv_files = [
        r'./merge.csv',
    ]

    for split_csv in split_csv_files:
        print(f'Printing information in {split_csv}...')
        answer_split = pd.read_csv(split_csv, engine='python', keep_default_na=False, na_values=['NaN', 'NULL'])
        skip_columns = ['Unnamed: 0', 'question']
        models = [x for x in answer_split.columns.tolist() if not x in (skip_columns)]
        for model in models:
            model_answers = answer_split[model]
            model_answers = model_answers[~model_answers.isna()]
            print(f'{len(model_answers.tolist())} answers for {model}')

Printing information in ./merge.csv...
19980 answers for lmsys/vicuna-7b-v1.5
19947 answers for gpt-4-turbo
19952 answers for gpt-35-turbo
19981 answers for lmsys/vicuna-13b-v1.5
19976 answers for lmsys/vicuna-33b-v1.3
19978 answers for meta-llama/Llama-2-7b-chat-hf
19982 answers for meta-llama/Llama-2-13b-chat-hf
20772 answers for Xwin-LM/Xwin-LM-13B-V0.1
20772 answers for Xwin-LM/Xwin-LM-7B-V0.1
20772 answers for WizardLM/WizardLM-7B-V1.0
20772 answers for WizardLM/WizardLM-13B-V1.2
20772 answers for mosaicml/mpt-7b-chat
20772 answers for huggyllama/llama-7b
20772 answers for huggyllama/llama-13b
20771 answers for huggyllama/llama-30b
20772 answers for chavinlo/alpaca-native
20772 answers for chavinlo/alpaca-13b
20772 answers for HuggingFaceH4/zephyr-7b-beta
20772 answers for tiiuae/falcon-7b-instruct
19795 answers for gemini
18545 answers for tiiuae/falcon-40b-instruct
14301 answers for meta-llama/Llama-2-70b-chat-hf
12028 answers for mosaicml/mpt-30b-chat
