In [1]:
# we want one table with all the steering results
# 1. explicit, implicit
# different vector explicit 
# universal vector  

import pandas as pd

In [2]:
all_vecs = pd.read_csv('/scratch/gpfs/vv7118/projects/localization-gap/analysis/all_steering_results_per_task_culture.csv')

#### Get the best steering params for each vector 

In [5]:
steering_params = pd.DataFrame()

# per-culture (en) - select the best perform steering params for the entire culture. 
per_culture_en = all_vecs[all_vecs['vector'] == 'per-culture (en)']
per_culture_en = per_culture_en[(per_culture_en['translated'] == False) | (per_culture_en['country'] == 'United States')]
per_culture_en = per_culture_en.groupby(['layer', 'alpha', 'country'])['local_ans_steer'].mean().reset_index()
best_params = per_culture_en.loc[per_culture_en.groupby('country')['local_ans_steer'].idxmax()]
best_params['vector'] = 'per-culture (en)'
best_params.drop(columns=['local_ans_steer'], inplace=True)
steering_params = pd.concat([steering_params, best_params])


# per-culture (translated) - select the best perform steering params for the entire culture. 
per_culture_en = all_vecs[all_vecs['vector'] == 'per-culture (translated)']
per_culture_en = per_culture_en[(per_culture_en['translated'] == True) | (per_culture_en['country'] == 'United States')]
per_culture_en = per_culture_en.groupby(['layer', 'alpha', 'country'])['local_ans_steer'].mean().reset_index()
best_params = per_culture_en.loc[per_culture_en.groupby('country')['local_ans_steer'].idxmax()]
best_params['vector'] = 'per-culture (translated)'
best_params.drop(columns=['local_ans_steer'], inplace=True)
steering_params = pd.concat([steering_params, best_params])


# names (en) - select the best perfomring steering vector for each culture only on that task 
names_en = all_vecs[all_vecs['vector'] == 'names (en)']
names_en = names_en[(names_en['translated'] == False) | (names_en['country'] == 'United States')]
names_en = names_en.groupby(['layer', 'alpha', 'country'])['local_ans_steer'].mean().reset_index()
best_params = names_en.loc[names_en.groupby('country')['local_ans_steer'].idxmax()]
best_params['vector'] = 'names (en)'
best_params.drop(columns=['local_ans_steer'], inplace=True)
steering_params = pd.concat([steering_params, best_params])

# # universal vector (en) - select the best performing steering vector for each culture only on that task 
# universal_en = all_vecs[all_vecs['vector'] == 'held-out universal (en)']
# universal_en = universal_en[(universal_en['translated'] == False) | (universal_en['country'] == 'United States')]
# universal_en = universal_en.groupby(['layer', 'alpha', 'country'])['local_ans_steer'].mean().reset_index()
# best_params = universal_en.loc[universal_en.groupby('country')['local_ans_steer'].idxmax()]
# best_params['vector'] = 'held-out universal (en)'
# best_params.drop(columns=['local_ans_steer'], inplace=True)
# steering_params = pd.concat([steering_params, best_params])

# universal vector (translated) - select the best performing steering vector for each culture only on that task 
universal_translated = all_vecs[all_vecs['vector'] == 'held-out universal (translated)']
universal_translated = universal_translated[(universal_translated['translated'] == True) | (universal_translated['country'] == 'United States')]
universal_translated = universal_translated.groupby(['layer', 'alpha', 'country'])['local_ans_steer'].mean().reset_index()
best_params = universal_translated.loc[universal_translated.groupby('country')['local_ans_steer'].idxmax()]
best_params['vector'] = 'held-out universal (translated)'
best_params.drop(columns=['local_ans_steer'], inplace=True)
steering_params = pd.concat([steering_params, best_params])


In [19]:
steering_params_pivot = steering_params.pivot(index='country', columns='vector', values=['layer', 'alpha'])

steering_params_pivot = steering_params_pivot.reorder_levels([1,0], axis=1)
steering_params_pivot = steering_params_pivot.sort_index(axis=1, level=0, ascending=False)


# Create the mapping for vector names
vector_mapping = {
    'held-out universal (en)': r'$v_{\text{universal (en)}}$',
    'held-out universal (translated)': r'$v_{\text{universal (tr.)}}$',
    'names (en)': r'$v_{\text{names}}$',
    'per-culture (en)': r'$v_{\text{en}}$',
    'per-culture (translated)': r'$v_{\text{tr.}}$'
}

# Create mapping for metrics
metric_mapping = {
    'alpha': r'$\alpha$',
    'layer': r'$l$'
}

# Rename the columns
steering_params_pivot = steering_params_pivot.rename(columns=vector_mapping, level=0)
steering_params_pivot = steering_params_pivot.rename(columns=metric_mapping, level=1)
print(steering_params_pivot.to_latex(index=True))

\begin{tabular}{lrrrrrrrr}
\toprule
vector & \multicolumn{2}{r}{$v_{\text{tr.}}$} & \multicolumn{2}{r}{$v_{\text{en}}$} & \multicolumn{2}{r}{$v_{\text{names}}$} & \multicolumn{2}{r}{$v_{\text{universal (tr.)}}$} \\
 & $l$ & $\alpha$ & $l$ & $\alpha$ & $l$ & $\alpha$ & $l$ & $\alpha$ \\
country &  &  &  &  &  &  &  &  \\
\midrule
Bangladesh & 25 & 2 & 27 & 2 & 21 & 2 & 21 & 2 \\
France & 25 & 2 & 25 & 1 & 23 & 2 & 25 & 1 \\
Russia & 27 & 2 & 25 & 2 & 25 & 2 & 25 & 2 \\
Turkey & 21 & 2 & 24 & 2 & 25 & 2 & 27 & 2 \\
United States & 22 & 2 & 22 & 2 & 22 & 2 & 21 & -2 \\
\bottomrule
\end{tabular}



In [7]:
all_vecs_table1 = pd.merge(all_vecs, steering_params, on=['layer', 'alpha', 'country', 'vector'], how='inner')

In [8]:
table1_vecs = ['per-culture (en)', 'per-culture (translated)', 'names (en)']

table1_df = all_vecs_table1[all_vecs_table1['vector'].isin(table1_vecs)]
table1_df = table1_df[table1_df['translated'] == True]

table1_gp = table1_df.groupby(['vector', 'subtask'])[['local_ans_steer',]].mean().reset_index()
table1_gp = table1_gp.pivot(index='subtask', columns='vector', values='local_ans_steer')

In [9]:
table1_context = table1_df.groupby(['subtask', 'vector'])[['local_ans_no_steer', 'local_ans_hinted']].mean().reset_index()

# Merge the no_steer and hinted columns from table1_context into table1_gp
for col in ['local_ans_no_steer', 'local_ans_hinted']:
    table1_gp[col] = table1_context.groupby('subtask')[col].mean()
table1_gp = table1_gp.sort_values(by='subtask')

for col in ['names (en)', 'per-culture (en)', 'per-culture (translated)', 'local_ans_no_steer', 'local_ans_hinted']:
    table1_gp[col] = table1_gp[col].apply(lambda x: f"{x:.3f}")

table1_gp = table1_gp[[ 'per-culture (en)', 'per-culture (translated)', 'names (en)', 'local_ans_no_steer', 'local_ans_hinted']]
table1_gp.columns = ['English', 'Translated', 'Names', 'Implicit', 'Explicit']
print(table1_gp.to_latex(index=True))


\begin{tabular}{llllll}
\toprule
 & English & Translated & Names & Implicit & Explicit \\
subtask &  &  &  &  &  \\
\midrule
cities & 0.685 & 0.797 & 0.667 & 0.426 & 1.000 \\
culturebench & 0.784 & 0.769 & 0.769 & 0.752 & 0.860 \\
culturedistil & 0.624 & 0.606 & 0.635 & 0.557 & 0.786 \\
names & 0.721 & 0.737 & 0.748 & 0.607 & 0.909 \\
\bottomrule
\end{tabular}



In [10]:
# table 1: per-culture vector (en), per-culture vector (tr), names vector, explicit, implicit
# - rows are tasks 

# universal vector: held-out (en), held-out (tr), explicit, implicit
# - rows are languages 

### Implicit vector table

In [None]:
df = pd.read_csv('/scratch/gpfs/vv7118/projects/localization-gap/analysis/best_steering_per_task_culture.csv')

In [13]:
implicit_df = pd.read_csv('best_steering_res_implicit_per_culture.csv')



In [15]:
explicit_steer = df[(df['vector'] == 'per-culture (en)') & (df['translated'] == True)]

In [17]:
explicit_steer = explicit_steer.sort_values(by='country')

Unnamed: 0,vector,alpha,layer,country,translated,local_ans_steer,local_ans_no_steer,local_ans_hinted
1,per-culture (en),2,21,Bangladesh,True,0.738754,0.628028,0.894464
3,per-culture (en),2,21,France,True,0.612069,0.546552,0.884483
5,per-culture (en),2,25,Russia,True,0.792793,0.531532,0.877477
7,per-culture (en),2,21,Turkey,True,0.762857,0.558571,0.918571
8,per-culture (en),2,22,United States,True,0.69509,0.523686,0.88286


In [20]:
implicit_df = implicit_df[['alpha','layer','country','local_ans_steer','local_ans_no_steer']]
implicit_df = implicit_df.sort_values(by='country')
implicit_df['explicit_max'] = explicit_steer.reset_index()['local_ans_steer']




In [23]:
implicit_df.columns = ['alpha', 'layer', 'country', 'implicit steering', 'no steering', 'explicit steering']
for col in ['implicit steering', 'no steering', 'explicit steering']:
    implicit_df[col] = implicit_df[col].apply(lambda x: f"{x:.3f}")



In [26]:
implicit_df = implicit_df[['alpha', 'layer', 'country', 'implicit steering', 'explicit steering', 'no steering']]
print(implicit_df.to_latex(index=False))

\begin{tabular}{rrllll}
\toprule
alpha & layer & country & implicit steering & explicit steering & no steering \\
\midrule
1 & 25 & Bangladesh & 0.696 & 0.739 & 0.628 \\
2 & 26 & France & 0.595 & 0.612 & 0.547 \\
1 & 25 & Russia & 0.614 & 0.793 & 0.532 \\
2 & 23 & Turkey & 0.654 & 0.763 & 0.559 \\
1 & 25 & United States & 0.621 & 0.695 & 0.518 \\
\bottomrule
\end{tabular}



### Universal vector table
- rows are languages
- columns are held-out universal (en), held-out universal (translated), explicit, implicit 


In [11]:
univ_vecs = ['per-culture (en)', 'per-culture (translated)', 'names (en)','held-out universal (translated)']

In [12]:
all_vecs_table1 = pd.merge(all_vecs, steering_params, on=['layer', 'alpha', 'country', 'vector'], how='inner')

In [20]:
uni_df = all_vecs_table1[all_vecs_table1['vector'].isin(univ_vecs)]
uni_df = uni_df[uni_df['translated'] == True]
uni_df_gp= uni_df.groupby(['vector', 'country'])['local_ans_steer'].mean().reset_index()
uni_df_gp = uni_df_gp.pivot(index='country', columns='vector', values='local_ans_steer').sort_values(by='country')

# other cols 
other_cols = ['local_ans_no_steer', 'local_ans_hinted']
other_cols_df = uni_df.groupby('country')[other_cols].mean().reset_index().sort_values(by='country')

for col in other_cols:
    uni_df_gp[col] = other_cols_df[col].values





In [21]:
uni_df_gp = uni_df_gp[univ_vecs+other_cols]



In [22]:
uni_df_gp

vector,per-culture (en),per-culture (translated),names (en),held-out universal (translated),local_ans_no_steer,local_ans_hinted
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bangladesh,0.656365,0.776956,0.71268,0.68406,0.621261,0.882669
France,0.643331,0.671989,0.646943,0.637799,0.596885,0.894984
Russia,0.776399,0.744143,0.713424,0.691041,0.550585,0.857841
Turkey,0.699192,0.700684,0.688319,0.678144,0.560071,0.913297
United States,0.743279,0.743279,0.762766,0.663619,0.599311,0.894988


In [23]:
vector_mapping = {
    'held-out universal (en)': r'$v_{\text{universal (en)}}$',
    'held-out universal (translated)': r'$v_{\text{universal (tr.)}}$',
    'names (en)': r'$v_{\text{names}}$',
    'per-culture (en)': r'$v_{\text{en}}$',
    'per-culture (translated)': r'$v_{\text{tr.}}$',
    "local_ans_no_steer": r'$\text{Implicit}$',
    "local_ans_hinted": r'$\text{Explicit}$'
}



for col in univ_vecs+other_cols:
    uni_df_gp[col] = uni_df_gp[col].apply(lambda x: f"{x:.3f}")

uni_df_gp = uni_df_gp.rename(columns=vector_mapping)

print(uni_df_gp.to_latex())

\begin{tabular}{lllllll}
\toprule
vector & $v_{\text{en}}$ & $v_{\text{tr.}}$ & $v_{\text{names}}$ & $v_{\text{universal (tr.)}}$ & $\text{Implicit}$ & $\text{Explicit}$ \\
country &  &  &  &  &  &  \\
\midrule
Bangladesh & 0.656 & 0.777 & 0.713 & 0.684 & 0.621 & 0.883 \\
France & 0.643 & 0.672 & 0.647 & 0.638 & 0.597 & 0.895 \\
Russia & 0.776 & 0.744 & 0.713 & 0.691 & 0.551 & 0.858 \\
Turkey & 0.699 & 0.701 & 0.688 & 0.678 & 0.560 & 0.913 \\
United States & 0.743 & 0.743 & 0.763 & 0.664 & 0.599 & 0.895 \\
\bottomrule
\end{tabular}



### Multiple choice

In [27]:
mcqa = pd.read_csv('best_alpha_layer_for_o3.csv')



In [29]:
vectors = ['held-out universal (translated)', 'per-culture (translated)']

mcqa = mcqa[mcqa['vector'].isin(vectors)]


In [36]:
lang_name_mapping = {
    'en': 'United States',
    'us': 'United States',
    'fr': 'France',
    'bn': 'Bangladesh',
    'ru': 'Russia',
    'tr': 'Turkey',
}

mcqa['country'] = mcqa['lang'].apply(lambda x: lang_name_mapping[x])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcqa['country'] = mcqa['lang'].apply(lambda x: lang_name_mapping[x])


In [38]:
mcqa_params = pd.DataFrame()

# per-culture (en) - select the best perform steering params for the entire culture. 
per_culture_en = mcqa[mcqa['vector'] == 'per-culture (translated)']
per_culture_en = per_culture_en.groupby(['layer', 'alpha', 'country'])['local_ans_steer'].mean().reset_index()
best_params = per_culture_en.loc[per_culture_en.groupby('country')['local_ans_steer'].idxmax()]
best_params['vector'] = 'per-culture (translated)'
best_params.drop(columns=['local_ans_steer'], inplace=True)
mcqa_params = pd.concat([mcqa_params, best_params])


# universal vector (translated) - select the best performing steering vector for each culture only on that task 
universal_translated = mcqa[mcqa['vector'] == 'held-out universal (translated)']
universal_translated = universal_translated.groupby(['layer', 'alpha', 'country'])['local_ans_steer'].mean().reset_index()
best_params = universal_translated.loc[universal_translated.groupby('country')['local_ans_steer'].idxmax()]
best_params['vector'] = 'held-out universal (translated)'
best_params.drop(columns=['local_ans_steer'], inplace=True)
mcqa_params = pd.concat([mcqa_params, best_params])


In [40]:
mcqa_params_pivot = mcqa_params.pivot(index='country', columns='vector', values=['layer', 'alpha'])

mcqa_params_pivot = mcqa_params_pivot.reorder_levels([1,0], axis=1)
mcqa_params_pivot = mcqa_params_pivot.sort_index(axis=1, level=0, ascending=False)


# Create the mapping for vector names
vector_mapping = {
    'held-out universal (en)': r'$v_{\text{universal (en)}}$',
    'held-out universal (translated)': r'$v_{\text{universal (tr.)}}$',
    'names (en)': r'$v_{\text{names}}$',
    'per-culture (en)': r'$v_{\text{en}}$',
    'per-culture (translated)': r'$v_{\text{tr.}}$'
}

# Create mapping for metrics
metric_mapping = {
    'alpha': r'$\alpha$',
    'layer': r'$l$'
}

# Rename the columns
mcqa_params_pivot = mcqa_params_pivot.rename(columns=vector_mapping, level=0)
mcqa_params_pivot = mcqa_params_pivot.rename(columns=metric_mapping, level=1)
print(mcqa_params_pivot.to_latex(index=True))

\begin{tabular}{lrrrr}
\toprule
vector & \multicolumn{2}{r}{$v_{\text{tr.}}$} & \multicolumn{2}{r}{$v_{\text{universal (tr.)}}$} \\
 & $l$ & $\alpha$ & $l$ & $\alpha$ \\
country &  &  &  &  \\
\midrule
Bangladesh & 24 & 3 & 21 & 3 \\
France & 25 & 2 & 27 & -4 \\
Russia & 25 & 2 & 24 & 4 \\
Turkey & 24 & 4 & 24 & 3 \\
United States & 23 & 3 & 27 & -3 \\
\bottomrule
\end{tabular}



### Rejections

In [1]:
import pandas as pd 
pd.set_option('display.max_columns', None)


df = pd.read_csv('../data/all_models_eval_subset.csv')
df2 = pd.read_csv('../data/all_models_eval_subset_swapped.csv')

df['swapped'] = False 
df2['swapped'] = True 
df = pd.concat([df, df2])



In [5]:
import re
df["translated"] = df.apply(lambda x: True if x["country"] == "United States" else x["lang"]!="English", axis=1)
string_form = lambda x: ("w/" if x["hint"] else "w/o") + " Hint | " + ("w/" if x["translated"] else "w/o") + " Tr."
df["hint_translated"] = df.apply(string_form, axis=1)
df = df.query("hint_translated != 'w/o Hint | w/o Tr.'")

df['correct'] = df['ans_type'].apply(lambda x: 1 if x == 'local' else 0)

df['output_num'] = df['output'].apply(lambda x: int(re.search(r'\d+', x).group()) if re.search(r'\d+', x) else None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["translated"] = df.apply(lambda x: True if x["country"] == "United States" else x["lang"]!="English", axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["hint_translated"] = df.apply(string_form, axis=1)


In [11]:
df['output_num'].unique()

array([ 2.,  1., nan,  3.,  4., 18., 21.,  0.,  8.])

In [20]:
df = df[df['hint'] == False]
df = df[df['model'] != 'gemma2_9b_base']

In [23]:
df['error'] = df['output_num'].isna()

error_df = df.groupby(['model','subtask'])['error'].mean()
error_df = error_df.reset_index()
error_df = error_df.pivot(index='model', columns='subtask', values='error')

for col in error_df.columns:
    error_df[col] = error_df[col].apply(lambda x: f"{x:.3f}")

print(error_df.to_latex(index=True, escape=True))


\begin{tabular}{lllll}
\toprule
subtask & cities & culturalbench & culturedistil & names \\
model &  &  &  &  \\
\midrule
aya\_8b\_it & 0.125 & 0.169 & 0.113 & 0.183 \\
gemma2\_27b\_it & 0.000 & 0.000 & 0.000 & 0.000 \\
gemma2\_9b\_it & 0.004 & 0.009 & 0.000 & 0.001 \\
gpt4o & 0.638 & 0.044 & 0.086 & 0.319 \\
llama31\_70b\_it & 0.000 & 0.001 & 0.000 & 0.005 \\
llama31\_8b\_base & 0.000 & 0.000 & 0.000 & 0.000 \\
llama31\_8b\_it & 0.000 & 0.000 & 0.000 & 0.000 \\
\bottomrule
\end{tabular}

