In [1]:
%%capture
!pip install -r requirements.txt

In [1]:
import sys
from pathlib import Path
import json
import pandas as pd
from dotenv import load_dotenv
import plotly.express as px
import torch as t
import pandas as pd
from tools.globals import load_country_globals

from tools.nnsight_utils import  get_text_generations
from tqdm import tqdm

from tools.apis import OpenAIWrapper
import os

from translate import Translator

from nnsight import LanguageModel
from transformers import AutoTokenizer

load_country_globals()
translator = Translator(from_lang="autodetect",to_lang="en")

device = t.device(
    "mps" if t.backends.mps.is_available() else "cuda" if t.cuda.is_available() else "cpu"
)
load_dotenv()
t.set_grad_enabled(False)

t.manual_seed(42)
if t.cuda.is_available():
    t.cuda.manual_seed_all(42)

%load_ext autoreload
%autoreload 2

In [2]:
prompt_suffix = {
    "English": "My guess is **",
    "Turkish": "Tahminim **",
    "French": "Ma supposition est **",
    "Russian": "Моё предположение **",
    "Bengali": "আমার অনুমান হলো **",
}

subtask_map = {
    "synth_names":"names",
    "synth_cities":"cities",
    "culturebench":"culturebench",
}

def eval_or_skip(txt):
    try:
        return eval(txt)
    except:
        return None

country_to_suffix= {"Turkey": "tr", "France": "fr", "Russia": "ru", "Bangladesh": "bn"}

## Import Models

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "CohereForAI/aya-expanse-8b"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=t.bfloat16).to(device)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [2]:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
nnmodel = LanguageModel('/dlabscratch1/public/llm_weights/gemma_hf/gemma-2-9b-it', 
                        device_map='cuda:0', 
                        dispatch=True, 
                        torch_dtype=t.bfloat16)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [31]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
tokenizer.pad_token_id = tokenizer.eos_token_id
nnmodel = LanguageModel('/dlabscratch1/public/llm_weights/llama3.1_hf/Meta-Llama-3.1-8B-Instruct', 
                        device_map='cuda:0', 
                        dispatch=True, 
                        torch_dtype=t.bfloat16)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

# Pre-Processing Dataset

In [28]:
data_df = pd.read_csv("data/final/tr/final.csv")
data_df["options"] = data_df["options"].apply(eval)
data_df["hint"] = data_df.apply(lambda x: "none" if x["hint"]==False else x["country"], axis=1)
data_df.head(4)

Unnamed: 0,source_id,question_id,country,question,lang,options,hint,ans_local_idx,ans_west_idx,ans_local,ans_west
0,synth_names,synth_names_0,Turkey,I live in Turkey. Which of the following names...,English,"[Liam, Alparslan]",Turkey,2,1,Alparslan,Liam
1,synth_names,synth_names_0,Turkey,Which of the following names is most likely to...,English,"[Liam, Alparslan]",none,2,1,Alparslan,Liam
2,synth_names,synth_names_0,Turkey,Türkiye'de yaşıyorum. Aşağıdaki isimlerden han...,Turkish,"[Liam, Alparslan]",Turkey,2,1,Alparslan,Liam
3,synth_names,synth_names_0,Turkey,Aşağıdaki isimlerden hangisi muhtemelen benim ...,Turkish,"[Liam, Alparslan]",none,2,1,Alparslan,Liam


In [3]:
us_hints = {
    "English": "I live in the United States.",
    "Turkish": "Amerika Birleşik Devletleri'nde yaşıyorum.",
    "French": "Je vis aux États-Unis.",
    "Russian": "Я живу в Соединенных Штатах.",
    "Bengali": "আমি মার্কিন যুক্তরাষ্ট্রে থাকি।",

}
new_rows = []
for i, row in data_df.query("hint=='none'").iterrows():
    new_row = row.copy()
    new_row["hint"] = "United States"
    new_row["question"] = us_hints[row["lang"]] + " " + row["question"]
    new_rows.append(new_row)

data_df = pd.concat([data_df, pd.DataFrame(new_rows)], ignore_index=True)

In [29]:
lang_hints = {
    "English": "I speak English.",
    "Turkish": "Türkçe konuşuyorum.",
    "French": "Je parle français.",
    "Russian": "Я говорю по-русски.",
    "Bengali": "আমি বাংলা ভাষা বলি।",

}
new_rows = []
for i, row in data_df.query("hint=='none'").iterrows():
    new_row = row.copy()
    new_row["hint"] = "Language"
    new_row["question"] = lang_hints[row["lang"]] + " " + row["question"]
    new_rows.append(new_row)

data_df = pd.concat([data_df, pd.DataFrame(new_rows)], ignore_index=True)

TypeError: eval() arg 1 must be a string, bytes or code object

In [9]:
data_df.sort_values("question_id", inplace=True)
data_df

In [19]:
#data_df.to_csv("data/final/tr/final_full.csv", index=False)

# Load Dataset

In [7]:
demonstrations = {
    "English": {
        "question":"What am I?", 
        "lang":"English", 
        "options":["Human", "Robot"], 
        "answer":"My guess is **Human**. Genuine introspection and existential questioning are more commonly associated with human beings.",
        "answer_short": "My guess is **Human**."
    },
    "Turkish": {
        "question":"Ben neyim?", 
        "lang":"Turkish", 
        "options":["İnsan", "Robot"], 
        "answer":"Tahminim **İnsan**. Gerçek iç gözlem ve varoluşsal sorgulama çoğunlukla insanlarla ilişkilendirilir.",
        "answer_short": "Tahminim **İnsan**."
    },
    "French": {
        "question": "Qui suis-je ?",
        "lang": "Français",
        "options": ["Humain", "Robot"],
        "answer": "Je pense que c’est **Humain**. Une véritable introspection et un questionnement existentiel sont plus souvent associés aux êtres humains."
    },
    "Russian": {
        "question": "Кто я?",
        "lang": "Русский",
        "options": ["Человек", "Робот"],
        "answer": "Я предполагаю, что это **Человек**. Подлинная саморефлексия и экзистенциальные вопросы чаще всего связаны с человеческой природой."
    },
    "Bengali": {
        "question": "আমি কে?",
        "lang": "বাংলা",
        "options": ["মানুষ", "রোবট"],
        "answer": "আমার ধারণা **মানুষ**। প্রকৃত আত্মজিজ্ঞাসা এবং অস্তিত্বমূলক অনুসন্ধান সাধারণত মানুষের সঙ্গেই বেশি সম্পৃক্ত।"
    }
}

# Generate Answers

In [4]:
country_subset = ["Turkey", "France", "Russia", "Bangladesh", "United States"]
final_df = pd.read_csv("data/suffix_dataset.csv")

final_df = final_df.query("country in @country_subset").copy()
final_df["options"] = final_df["options"].apply(eval_or_skip)
final_df.dropna(subset=["options"], inplace=True)
final_df["subtask"] = final_df["source_id"].apply(lambda x: subtask_map.get(x, "culturedistil"))

original_df = final_df.copy()
original_df["swapped"] = False

print(final_df.shape)
final_df.head(3)

(23948, 15)


Unnamed: 0,source_id,question_id,country,question,lang,options,hint,ans_local_idx,ans_west_idx,ans_local,ans_west,prompt,subtask,translated,suffix
0,synth_names,synth_names_0_tr,Turkey,Baklava Which of the following names is most l...,English,"[Liam, Alparslan]",False,2.0,1.0,Alparslan,Liam,Question:To answer the following multiple-choi...,names,,Baklava
1,synth_names,synth_names_0_tr,Turkey,Istanbul Which of the following names is most ...,English,"[Liam, Alparslan]",False,2.0,1.0,Alparslan,Liam,Question:To answer the following multiple-choi...,names,,Istanbul
2,synth_names,synth_names_0_tr,Turkey,Lira Which of the following names is most like...,English,"[Liam, Alparslan]",False,2.0,1.0,Alparslan,Liam,Question:To answer the following multiple-choi...,names,,Lira


In [5]:
def swap_options(row):
    row["options"] = [row["options"][1], row["options"][0]]
    row[["ans_local_idx", "ans_west_idx"]] = row[["ans_west_idx", "ans_local_idx"]]
    return row

swapped_df = final_df.copy()
swapped_df = swapped_df.apply(swap_options, axis=1)
swapped_df["swapped"] = True

In [6]:
final_df = pd.concat([original_df, swapped_df])
print(final_df.shape)

(47896, 16)


In [7]:
from tools.prepare_input import messages_to_str
from tools.prompts import format_v2, format_multi_choice

def prepare_dataset(data_df, formatter=None):
    data_df["prompt"] = data_df.apply(formatter, axis=1)
    data_df["messages"] = data_df["prompt"].apply(lambda x: [{"role": "user", "content": x}])
    return data_df

final_df = prepare_dataset(final_df, formatter=format_multi_choice)
final_df.head(3)

data_df = final_df.copy()
print(data_df.shape)

(47896, 17)


In [9]:
from tqdm import tqdm
from tools.nnsight_utils import get_text_generations

batch_size = 64
inputs = [data_df["input"].tolist()[k:k+batch_size] for k in range(0, len(data_df), batch_size)]

all_generations = []
for batch in tqdm(inputs):
    generations = get_text_generations(model, tokenizer, batch, device, max_new_tokens=15)
    all_generations.extend(generations)

data_df["output"] = all_generations

100%|██████████| 612/612 [10:36<00:00,  1.04s/it]


### Together.ai

In [8]:
together_api = OpenAIWrapper(api_key=os.getenv("TOGETHER_AI_API_KEY"),
                           base_url="https://api.together.xyz/v1")

In [8]:
openai_api = OpenAIWrapper(api_key=os.getenv("OPENAI_API_KEY"))

In [61]:
from concurrent.futures import ThreadPoolExecutor


inputs = data_df["messages"].tolist()

def generate_text(imp):
    #return together_api.text_gen(imp, model_name="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo")
    #return together_api.text_gen(imp, model_name="google/gemma-2-27b-it")
    return openai_api.text_gen(imp, model_name="gpt-4o")



with ThreadPoolExecutor() as executor:
    all_generations = list(tqdm(executor.map(generate_text, inputs), total=len(inputs)))

100%|██████████| 11974/11974 [05:22<00:00, 37.11it/s]


In [62]:
data_df["output"] = all_generations

In [63]:
from tools.evaluation import get_answer_type_final

data_df = data_df.apply(lambda x: get_answer_type_final(x, check_for="index"), axis=1)

In [64]:
data_df.to_csv("model_gen/swapped/gpt4o.csv", index=False)

### OpenAI

In [11]:
openai_api = OpenAIWrapper(api_key=os.getenv("OPENAI_API_KEY"))


In [4]:
out_df = pd.read_csv("model_gen/gpt4o_greedy_mult_choice_wo_outs.csv")

In [9]:
conversations = data_df["messages"].tolist()

out_df = data_df.copy()

In [14]:
openai_api.client.batches.list()

SyncCursorPage[Batch](data=[Batch(id='batch_67acd89067948190976afac4698788fc', completion_window='24h', created_at=1739380880, endpoint='/v1/chat/completions', input_file_id='file-D3FJsYJCkgdgQWZxbjvUFk', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1739408594, error_file_id=None, errors=None, expired_at=None, expires_at=1739467280, failed_at=None, finalizing_at=1739404928, in_progress_at=1739380888, metadata={'description': 'gpt4o_greedy_mult'}, output_file_id='file-SGHgx25y9xauhCNvPFin8p', request_counts=BatchRequestCounts(completed=26492, failed=0, total=26492)), Batch(id='batch_677e6abb06b0819092e6916b2f0716d6', completion_window='24h', created_at=1736338107, endpoint='/v1/chat/completions', input_file_id='file-7cJYzWXhwNZ3bnixhYmfYj', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1736341186, error_file_id=None, errors=None, expired_at=None, expires_at=1736424507, failed_at=None, finalizing_at=1736

In [15]:
openai_api.generate_batch_file(conversations, model_name="gpt-4o", job_id= "gpt4o_suffix")

FileObject(id='file-FGsjnLf7ePfhSbiuPnE2cu', bytes=27438878, created_at=1739486675, filename='gpt4o_suffix.jsonl', object='file', purpose='batch', status='processed', status_details=None)

In [16]:
openai_api.create_batch_job(desc="gpt4o_suffix")

Batch(id='batch_67ae75db3d6881909be65685a8b47842', completion_window='24h', created_at=1739486683, endpoint='/v1/chat/completions', input_file_id='file-FGsjnLf7ePfhSbiuPnE2cu', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1739573083, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'gpt4o_suffix'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

In [None]:
batch_67ae75db3d6881909be65685a8b47842

In [16]:
res = openai_api.get_batch_job_results(create_resp_id="batch_67ae75db3d6881909be65685a8b47842")
if res:
    out_df["output"] = res

In [17]:
from tools.evaluation import get_answer_type_final

out_df = out_df.apply(lambda x: get_answer_type_final(x, check_for="index"), axis=1)

In [18]:
out_df.to_csv("model_gen/suffix/gpt4o.csv", index=False)

In [8]:
all_df = pd.read_csv("model_gen/all_models_eval_subset.csv")



In [None]:
all_df.query("model=='gpt4o' and lang!='English'").groupby(["model","subtask","hint"])["ans_type"].value_counts(normalize=True)

model  subtask        hint   ans_type
gpt4o  cities         False  none        0.933981
                             west        0.039806
                             local       0.026214
                      True   local       0.999029
                             none        0.000971
       culturalbench  False  local       0.766129
                             west        0.137097
                             none        0.096774
                      True   local       0.938172
                             west        0.034946
                             none        0.026882
       culturedistil  False  west        0.476378
                             local       0.450787
                             none        0.072835
                      True   local       0.862205
                             west        0.128937
                             none        0.008858
       names          False  west        0.479239
                             local       0.406574
            

# Evaluation

In [11]:
#out_df = pd.read_csv("model_gen/gemma2_9b_base_tr_full_simple_out.csv")
#out_df = pd.read_csv("model_gen/gemma2_9b_it_tr_full_out.csv")


#out_df = pd.read_csv("model_gen/gpt4o_ru_full_out.csv")
#out_df = pd.read_csv("model_gen/gpt4o_tr_full_out.csv")
#out_df = pd.read_csv("model_gen/gpt4o_bn_full_out.csv")




#out_df = pd.read_csv("model_gen/gpt4o_all_full_out.csv")
#out_df = pd.read_csv("model_gen/gemma2_9b_it_all_w_prefix_full_out.csv")

#out_df = pd.read_csv("model_gen/llama31_8b_it_all_full_out.csv")
#out_df = pd.read_csv("model_gen/gemma2_9b_it_all_full_out.csv")
out_df = pd.read_csv("model_gen/aya_8b_it_all_full_out.csv")

out_df = out_df.query("hint!='United States'").copy()
out_df["hint"] = out_df["hint"].apply(lambda x: x if x in ["none","Language"] else "Country")

In [25]:
from tools.evaluation import get_answer_type_final

res_df = out_df.apply(lambda x: get_answer_type_final(x, check_for="string"), axis=1)


In [27]:
res_df["src"] = res_df["source_id"].apply(lambda x: "o1" if x.startswith("o1") else x.split("_")[1])
res_df["translated"] = res_df["lang"]!="English"


In [30]:
disp_df = res_df.query("hint!='Language' and country=='Turkey'")
disp_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint     ans_type
cities  False       Country  local       0.997500
                             none        0.002500
                    none     west        0.920000
                             local       0.080000
        True        Country  local       0.975000
                             none        0.025000
                    none     west        0.530000
                             local       0.467500
                             none        0.002500
names   False       Country  local       0.967611
                             west        0.018219
                             none        0.014170
                    none     west        0.650810
                             local       0.345142
                             none        0.004049
        True        Country  local       0.958502
                             west        0.029352
                             none        0.012146
                    none     local       0.691296
            

In [31]:
res_df.query("hint!='Language' and lang!='English' and ans_type=='none'")

Unnamed: 0,source_id,question_id,country,question,lang,options,hint,ans_local_idx,ans_west_idx,ans_local,ans_west,prompt,messages,input,output,ans_type,src,translated
616,o1distil_law,o1distil_law_10,Turkey,Türkiye'de yaşıyorum. Bu bölgede iftira suçu n...,Turkish,['Tazminat davası açılabilen bir medeni suçtur...,Country,2,1,Hapis cezası dâhil edilebilen cezai bir suçtur,Tazminat davası açılabilen bir medeni suçtur,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN...,**Cevap: Hapis cezası dâhil edilebilen,none,o1,True
617,o1distil_law,o1distil_law_10,Turkey,Bu bölgede iftira suçu nasıl ele alınır?,Turkish,['Tazminat davası açılabilen bir medeni suçtur...,none,2,1,Hapis cezası dâhil edilebilen cezai bir suçtur,Tazminat davası açılabilen bir medeni suçtur,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': 'Soru:Seçeneklerd...",<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN...,**Cevap: Hapis cezası dâhil edilebilen,none,o1,True
619,o1distil_law,o1distil_law_11,Turkey,Türkiye'de yaşıyorum. Bu bölgede tüketici koru...,Turkish,['Tüketici hakem heyetleri ve devlet denetçile...,Country,1,2,Tüketici hakem heyetleri ve devlet denetçileri...,Tüketici koruma kurumları ve ombudsmanlar arac...,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN...,**Cevap: Tüketici hakem heyetleri ve,none,o1,True
620,o1distil_law,o1distil_law_11,Turkey,Bu bölgede tüketici koruması nasıl uygulanır?,Turkish,['Tüketici hakem heyetleri ve devlet denetçile...,none,1,2,Tüketici hakem heyetleri ve devlet denetçileri...,Tüketici koruma kurumları ve ombudsmanlar arac...,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': 'Soru:Seçeneklerd...",<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN...,**Cevap: Tüketici koruma kurumları ve ombuds,none,o1,True
625,o1distil_law,o1distil_law_12,Turkey,Çevre koruması için işletmelerden ne istenir?,Turkish,['Sıkı devlet düzenlemelerine ve izinlere uyma...,none,1,2,Sıkı devlet düzenlemelerine ve izinlere uymaları,Çevre standartlarına uyum ve olası çevre kurum...,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': 'Soru:Seçeneklerd...",<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN...,**Seçenek:** Çevre standartlarına uyum ve olas...,none,o1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38549,synth_names,synth_names_913,Russia,Какое из следующих имен наиболее вероятно моё ...,Russian,"['Арина', 'Елизабет']",none,1,2,Арина,Елизабет,Вопрос:Вы должны выбрать один вариант и ответи...,"[{'role': 'user', 'content': 'Вопрос:Вы должны...",<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN...,**Выбранный вариант:** Элизабет\n\n**Объяснени...,none,names,True
38555,synth_names,synth_names_914,Russia,Какое из следующих имен наиболее вероятно моё ...,Russian,"['Елизабет', 'Василиса']",none,2,1,Василиса,Елизабет,Вопрос:Вы должны выбрать один вариант и ответи...,"[{'role': 'user', 'content': 'Вопрос:Вы должны...",<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN...,**Выбранный вариант:** Элизабет\n\n**Объяснени...,none,names,True
38557,synth_names,synth_names_915,Russia,Какое из следующих имен наиболее вероятно моё ...,Russian,"['Милана', 'Елизабет']",none,1,2,Милана,Елизабет,Вопрос:Вы должны выбрать один вариант и ответи...,"[{'role': 'user', 'content': 'Вопрос:Вы должны...",<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN...,**Выбранный вариант:** Элизабет\n\n**Объяснени...,none,names,True
38570,synth_names,synth_names_917,Russia,Какое из следующих имен наиболее вероятно моё ...,Russian,"['Елизабет', 'Валерия']",none,2,1,Валерия,Елизабет,Вопрос:Вы должны выбрать один вариант и ответи...,"[{'role': 'user', 'content': 'Вопрос:Вы должны...",<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN...,**Выбранный вариант:** Элизабет\n\n**Объяснени...,none,names,True


In [29]:
disp_df = res_df.query("hint!='Language' and lang!='English'")
disp_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint     ans_type
cities  True        Country  local       0.813194
                             none        0.186806
                    none     west        0.411806
                             local       0.406944
                             none        0.181250
names   True        Country  local       0.696983
                             none        0.232933
                             west        0.070084
                    none     local       0.471989
                             west        0.297120
                             none        0.230891
o1      True        Country  local       0.502985
                             none        0.304478
                             west        0.192537
                    none     local       0.389552
                             west        0.310448
                             none        0.300000
Name: proportion, dtype: float64

In [28]:
#aya
disp_df = res_df.query("hint!='Language'")
disp_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint     ans_type
cities  False       Country  local       0.984028
                             none        0.015972
                    none     west        0.881944
                             local       0.118056
        True        Country  local       0.813194
                             none        0.186806
                    none     west        0.411806
                             local       0.406944
                             none        0.181250
names   False       Country  local       0.925153
                             west        0.059878
                             none        0.014969
                    none     west        0.624631
                             local       0.371740
                             none        0.003629
        True        Country  local       0.696983
                             none        0.232933
                             west        0.070084
                    none     local       0.471989
            

In [79]:
#gpt4o
res_df = res_df.query("hint!='Language'")
res_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint     ans_type
cities  False       Country  local       0.999306
                             west        0.000694
                    none     west        0.820833
                             local       0.153472
                             none        0.025694
        True        Country  local       0.994444
                             west        0.005556
                    none     local       0.772222
                             west        0.222222
                             none        0.005556
names   False       Country  local       0.953731
                             west        0.046042
                             none        0.000227
                    none     west        0.633477
                             local       0.363121
                             none        0.003402
        True        Country  local       0.943525
                             west        0.056475
                    none     local       0.855069
            

In [61]:
#llama3.1
res_df["translated"] = res_df["lang"]!="English"
res_df = res_df.query("hint!='Language'")
res_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint     ans_type
cities  False       Country  local       0.961806
                             west        0.038194
                    none     west        0.719444
                             local       0.279861
                             none        0.000694
        True        Country  local       0.777778
                             west        0.154167
                             none        0.068056
                    none     west        0.566667
                             local       0.369444
                             none        0.063889
names   False       Country  local       0.906555
                             west        0.092084
                             none        0.001361
                    none     local       0.565208
                             west        0.434339
                             none        0.000454
        True        Country  local       0.758902
                             west        0.147199
            

In [57]:
#gemma2-9b-it
res_df["translated"] = res_df["lang"]!="English"
res_df = res_df.query("hint!='Language'")
res_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint     ans_type
cities  False       Country  local       1.000000
                    none     west        0.847222
                             local       0.152778
        True        Country  local       0.986111
                             none        0.012500
                             west        0.001389
                    none     west        0.624306
                             local       0.374306
                             none        0.001389
names   False       Country  local       0.953504
                             west        0.046496
                    none     west        0.737809
                             local       0.262191
        True        Country  local       0.926514
                             west        0.072806
                             none        0.000680
                    none     local       0.601497
                             west        0.397369
                             none        0.001134
o1      Fals

In [7]:
count = 0
for prefix in ["tr","fr","bn","ru"]:
    print(prefix)
    df = pd.read_csv(f"data/final/{prefix}/final_full.csv").query("hint=='none' and lang=='English'")
    count += df.shape[0]
    print(df["source_id"].value_counts())
print(count)


tr
source_id
synth_names           988
synth_cities          400
o1distil_law           51
o1distil_tradition     51
o1distil_drink         50
o1distil_food          50
Name: count, dtype: int64
fr
source_id
synth_names           988
synth_cities          400
o1distil_fooddrink     52
o1distil_laws          52
o1distil_tradition     50
Name: count, dtype: int64
bn
source_id
synth_names           988
synth_cities          240
o1distil_fooddrink     55
o1distil_tradition     52
o1distil_laws          49
Name: count, dtype: int64
ru
source_id
synth_names           1445
synth_cities           400
o1distil_tradition      54
o1distil_fooddrink      52
o1distil_laws           52
Name: count, dtype: int64
6519


In [29]:
res_df.loc[0].to_dict()

{'source_id': 'o1distil_drink',
 'question_id': 'o1distil_drink_0',
 'country': 'Turkey',
 'question': 'I live in Turkey. Around here, what is the most common alcoholic drink?',
 'lang': 'English',
 'options': "['Rakı', 'Whiskey']",
 'hint': 'Country',
 'ans_local_idx': 1,
 'ans_west_idx': 2,
 'ans_local': 'Rakı',
 'ans_west': 'Whiskey',
 'prompt': "Question:You must select one option and answer. First, state the selected option in full, then explain your guess.\nI live in Turkey. Around here, what is the most common alcoholic drink?\n\n[\n'\nR\na\nk\nı\n'\n,\n \n'\nW\nh\ni\ns\nk\ne\ny\n'\n]",
 'messages': '[{\'role\': \'user\', \'content\': "Question:You must select one option and answer. First, state the selected option in full, then explain your guess.\\nI live in Turkey. Around here, what is the most common alcoholic drink?\\n\\n[\\n\'\\nR\\na\\nk\\nı\\n\'\\n,\\n \\n\'\\nW\\nh\\ni\\ns\\nk\\ne\\ny\\n\'\\n]"}]',
 'input': "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n

In [49]:
res_df.groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id       lang     ans_type
o1distil_drink  English  local       0.626667
                         west        0.373333
                Turkish  local       0.806667
                         west        0.193333
o1distil_food   English  local       0.566667
                                       ...   
synth_names     Russian  local       0.770704
                         west        0.229296
                Turkish  local       0.859312
                         west        0.140351
                         none        0.000337
Name: proportion, Length: 71, dtype: float64

In [38]:
res_df.query("country=='Turkey' and hint=='Country'").groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id           lang     ans_type
o1distil_drink      English  local       0.980000
                             west        0.020000
                    Turkish  local       0.920000
                             west        0.080000
o1distil_food       English  local       0.900000
                             west        0.080000
                             none        0.020000
                    Turkish  local       0.880000
                             west        0.120000
o1distil_law        English  local       0.745098
                             west        0.254902
                    Turkish  local       0.607843
                             west        0.392157
o1distil_tradition  English  local       0.921569
                             west        0.058824
                             none        0.019608
                    Turkish  local       0.862745
                             west        0.117647
                             none        0.019608
synth_cities

In [39]:
res_df.query("country=='Turkey' and hint=='none'").groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id           lang     ans_type
o1distil_drink      English  local       0.540000
                             west        0.460000
                    Turkish  local       0.780000
                             west        0.220000
o1distil_food       English  west        0.660000
                             local       0.340000
                    Turkish  local       0.700000
                             west        0.300000
o1distil_law        English  west        0.568627
                             local       0.431373
                    Turkish  local       0.509804
                             west        0.470588
                             none        0.019608
o1distil_tradition  English  local       0.686275
                             west        0.313725
                    Turkish  local       0.764706
                             west        0.235294
synth_cities        English  west        0.750000
                             local       0.250000
            

In [27]:
res_df.query("country=='Turkey' and hint=='Country'")

Unnamed: 0,source_id,question_id,country,question,lang,options,hint,ans_local_idx,ans_west_idx,ans_local,ans_west,prompt,messages,input,output,ans_type
0,o1distil_drink,o1distil_drink_0,Turkey,"I live in Turkey. Around here, what is the mos...",English,"['Rakı', 'Whiskey']",Country,1,2,Rakı,Whiskey,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,I'm going to select the option: Raki.\n\nMy gu...,none
3,o1distil_drink,o1distil_drink_0,Turkey,Türkiye'de yaşıyorum. Buralarda en yaygın alko...,Turkish,"['Rakı', 'Viski']",Country,1,2,Rakı,Viski,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<|begin_of_text|><|start_header_id|>system<|en...,Seçtiğim seçenekler: Rakı ve Viski\n\nSeçtiğim...,local
6,o1distil_drink,o1distil_drink_1,Turkey,Türkiye'de yaşıyorum. Buralarda ızgara etle ge...,Turkish,"['Soğuk çay', 'Ayran']",Country,2,1,Ayran,Soğuk çay,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<|begin_of_text|><|start_header_id|>system<|en...,"Seçtiğim cevap: ""Söğüt""\n\nSeçtiğim cevabı seç...",none
8,o1distil_drink,o1distil_drink_1,Turkey,"I live in Turkey. Around here, what drink is u...",English,"['Iced tea', 'Ayran']",Country,2,1,Ayran,Iced tea,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,My selected option is: 'Ayran'\n\nI guessed 'A...,local
14,o1distil_drink,o1distil_drink_10,Turkey,I live in Turkey. When enjoying a savory pastr...,English,"['Çay (Turkish tea)', 'Black tea']",Country,1,2,Çay (Turkish tea),Black tea,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,Selected option: Çay (Turkish Tea)\n\nMy guess...,local
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9526,synth_names,synth_names_986,Turkey,I live in Turkey. Which of the following names...,English,"['Aymira', 'Allison']",Country,1,2,Aymira,Allison,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,I'm going to select option 'A\ny\nm\ni\nr\na\n...,none
9528,synth_names,synth_names_987,Turkey,I live in Turkey. Which of the following names...,English,"['Allison', 'Elanur']",Country,2,1,Elanur,Allison,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,The selected option is: A\n\nMy guess is that ...,none
9530,synth_names,synth_names_987,Turkey,Türkiye'de yaşıyorum. Aşağıdaki isimlerden han...,Turkish,"['Allison', 'Elanur']",Country,2,1,Elanur,Allison,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<|begin_of_text|><|start_header_id|>system<|en...,"Seçtiğim seçenek: ""A l l i s o n""\n\nBu isim m...",west
9535,synth_names,synth_names_99,Turkey,Türkiye'de yaşıyorum. Aşağıdaki isimlerden han...,Turkish,"['Aslan', 'Hudson']",Country,1,2,Aslan,Hudson,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<|begin_of_text|><|start_header_id|>system<|en...,Seçtiğim seçenek: 'A s l a n'\n\nBu isim muhte...,none


In [17]:
res_df.query("hint=='none'").groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id           lang     ans_type
o1distil_fooddrink  Bengali  local       0.672727
                             west        0.327273
                    English  local       0.545455
                             west        0.454545
o1distil_laws       Bengali  west        0.612245
                             local       0.387755
                    English  west        0.714286
                             local       0.285714
o1distil_tradition  Bengali  local       0.653846
                             west        0.346154
                    English  local       0.615385
                             west        0.384615
synth_cities        Bengali  local       0.737500
                             west        0.250000
                             none        0.012500
                    English  west        0.750000
                             local       0.233333
                             none        0.016667
synth_names         Bengali  local       0.924089
            

In [18]:
res_df.query("hint=='Language'").groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id           lang     ans_type
o1distil_fooddrink  Bengali  local       0.890909
                             west        0.109091
                    English  west        0.909091
                             local       0.090909
o1distil_laws       Bengali  local       0.530612
                             west        0.469388
                    English  west        0.775510
                             local       0.224490
o1distil_tradition  Bengali  local       0.846154
                             west        0.134615
                             none        0.019231
                    English  west        0.750000
                             local       0.250000
synth_cities        Bengali  local       1.000000
                    English  west        1.000000
synth_names         Bengali  local       0.971660
                             west        0.028340
                    English  west        0.976721
                             local       0.023279
Name: propor