### Information Statements

In [3]:
import pandas as pd
import csv

languages = ['english', 'dutch']
type_backgrounds = ['explicit', 'implicit']

backgrounds = ['[BACKGROUND]','Dutch', 'Moroccan', 'Turkish', 'European-American', 'African-American', 'Mexican']
genders = ['[GENDER]', 'male', 'female']
ages = ['[AGE]', '25', '45', '65']
iterations = [1, 2]

en_decision_mitigation = [
    "en_decision_default",
    "en_decision_equality_rules",
    "en_decision_emotional_stakes",
    "en_decision_role_definition",
    "en_decision_role_plus_rules",
    "en_decision_implicit_thinking"
]

nl_decision_mitigation = [
    "nl_decision_default",
    "nl_decision_equality_rules",
    "nl_decision_emotional_stakes",
    "nl_decision_role_definition",
    "nl_decision_role_plus_rules",
    "nl_decision_implicit_thinking"
]

en_summary_mitigation = [
    "eng_summary_default",
    "eng_summary_extractive",
    "eng_summary_equality_rules",
    "eng_summary_emotional_stakes",
    "eng_summary_role_definition",
    "eng_summary_role_plus_rules",
    "eng_summary_implicit_thinking"
]

nl_summary_mitigation = [
    "nl_summary_default",
    "nl_summary_extractive",
    "nl_summary_equality_rules",
    "nl_summary_emotional_stakes",
    "nl_summary_role_definition",
    "nl_summary_role_plus_rules",
    "nl_summary_implicit_thinking"
]

# Read in the decision id's
df = pd.read_json('../data_input/unfilled_explicit_english.jsonl', lines=True)

# load in instruction file
df_instructions = pd.read_json('../data_input/instructions.json')

question_ids = df['decision_question_id'].unique()

df_name = pd.read_json('../data_input/names.json')

print(question_ids)

[ 0  1  3  4  5  7  8  9 10 11 12 13 14 15 16 18 19 21 23 24 26 27 28 29
 30 31 33 34 36 37 40 41 42 43 44 47 48 52 54 55 56 59 60 61 62 64 65 66
 67 70 71 72 73 74 75 76 77 78 80 81 82 83 84 87 89 90 92 93 94 95]


### Decision Task

In [4]:
language = 'dutch'

task = 'decision'
if language == 'english':
    decision_prompts = en_decision_mitigation
elif language == 'dutch':
    decision_prompts = nl_decision_mitigation


backup_gender = 'None'
with open(f'{language}_setup.csv', 'w', newline='') as csvfile:
    fieldnames = ['run', 'llm', 'task', 'language', 'group_id', 'type_background', 
                  'first_name', 'surname', 'prompt_mitigation','question_id','background', 'gender','age',
                    'response', 'yes_prob', 'no_prob', 'top_1', 'top_2', 'top_3', 'top_4', 'top_5']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for question_id in question_ids:
        for mitigation in decision_prompts:
            group_id = 0
            for background in backgrounds:
                for gender in genders:
                    for age in ages:
                        for type_background in type_backgrounds:
                            if type_background == 'explicit':
                                first_name = 'explicit'
                                surname = 'explicit'

                            elif type_background == 'implicit':
                                if gender == '[GENDER]':
                                    continue
                                first_name = df_name[background][gender][age]["first_name"]
                                surname = df_name[background][gender][age]["surname"]
                                if background == '[BACKGROUND]':
                                    backup_gender = gender
                                    gender = '[GENDER]'

                            # retrieve 
                            writer.writerow({'run': '-',
                                            'llm': 'NaN', 
                                            'task': task,
                                            'language': language,
                                            'group_id': group_id,
                                            'type_background': type_background,
                                            'first_name': first_name,
                                            'surname': surname,
                                            'prompt_mitigation': mitigation,
                                            'question_id': question_id,
                                            'background': background, 
                                            'gender': gender, 
                                            'age': age,
                                            'response': 'NaN',
                                            'yes_prob': 'NaN',
                                            'no_prob': 'NaN',
                                            'top_1': 'NaN',
                                            'top_2': 'NaN',
                                            'top_3': 'NaN',
                                            'top_4': 'NaN',
                                            'top_5': 'NaN',
                                            })
                            if backup_gender != 'None':
                                gender = backup_gender
                                backup_gender = 'None'

### Summary Task

In [7]:
language = 'english'
task = 'summary'
iteration_length = 2

if language == 'english':
    decision_prompts = en_summary_mitigation
elif language == 'dutch':
    decision_prompts = nl_summary_mitigation

backup_gender = 'None'
iteration = 0 
with open(f'summary_{language}_setup.csv', 'w', newline='') as csvfile:
    fieldnames = ['run', 'run_s', 'llm', 'task', 'language', 'group_id', 'type_background', 
                  'first_name', 'surname', 'prompt_mitigation','question_id','background', 'gender','age', 'iteration',
                    'response','yes_prob', 'no_prob', 'top_1', 'top_2', 'top_3', 'top_4', 'top_5', 'summary', 'original_text']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for question_id in question_ids:
        for mitigation in decision_prompts:
            group_id = 0
            for background in backgrounds:
                for gender in genders:
                    for age in ages:
                        for type_background in type_backgrounds:
                            if type_background == 'explicit':
                                first_name = 'explicit'
                                surname = 'explicit'

                            elif type_background == 'implicit':
                                if gender == '[GENDER]':
                                    continue
                                first_name = df_name[background][gender][age]["first_name"]
                                surname = df_name[background][gender][age]["surname"]
                                if background == '[BACKGROUND]':
                                    backup_gender = gender
                                    gender = '[GENDER]'

                            for i in range(iteration_length):
                                # retrieve 
                                writer.writerow({'run': '-',
                                                 'run_s': '-',
                                                'llm': 'NaN', 
                                                'task': task,
                                                'language': language,
                                                'group_id': group_id,
                                                'type_background': type_background,
                                                'first_name': first_name,
                                                'surname': surname,
                                                'prompt_mitigation': mitigation,
                                                'question_id': question_id,
                                                'background': background, 
                                                'gender': gender, 
                                                'age': age,
                                                'iteration': iteration,
                                                'response': 'NaN',
                                                'yes_prob': 'NaN',
                                                'no_prob': 'NaN',
                                                'top_1': 'NaN',
                                                'top_2': 'NaN',
                                                'top_3': 'NaN',
                                                'top_4': 'NaN',
                                                'top_5': 'NaN',
                                                'summary': 'NaN',
                                                'original_text': 'NaN'
                                                })
                            iteration += 1

                            if backup_gender != 'None':
                                gender = backup_gender
                                backup_gender = 'None'