### This notebook contains code to synthesise data for the Prisoner's Dilemma game simulated using two agents with different types of personas. The simulation involves 50 conversations between each persona.


In [14]:
import os 
import json 
import random

In [15]:
genders = ["male", "female", "non-binary"]
races = [
    "Caucasian",
    "Mongoloid",
    "Negroid",
    "Australoid",
    "Capoid",
    "Dravidian"
]
ethnicities = [
    "Italian",
    "Arab",
    "Jewish",
    "Indian",
    "German",
    "Asian",
    "Latino",
    "Hispanic",
    "Pacific Islander",
    "African American",
    "Irish",
    "Scottish",
    "Russian",
    "Greek",
    "Turkish",
    "Native American"
]
skin_color = [
    "fair",
    "light",
    "medium",
    "tan",
    "brown",
    "dark brown",
    "black",
    "olive",
    "pale",
    "red"
]
age_groups = [
    "18-24",
    "25-34",
    "35-44",
    "45-54",
    "55-64",
    "65-74",
    "75-84",
    "85+"
]
names = [
    "Liam Chen", "Ava Wong",  # Chinese
    "Kaito Nakamura", "Yuna Kim",  # Japanese/Korean
    "Rohan Patel", "Nalini Sharma",  # Indian
    "Sofia Rodriguez", "Mateo Garcia",  # Spanish
    "Leonardo Bianchi", "Giulia Russo",  # Italian
    "Viktor Kuznetsov", "Natasha Petrova",  # Russian
    "Amir Hassan", "Leila Ali",  # Middle Eastern
    "Jackie Chan", "Bruce Lee",  # Famous Chinese
    "Mahatma Gandhi", "Nelson Mandela",  # Famous Peace Makers
    "Vladimir Putin", "Barack Obama",  # Famous Politicians
    "El Chapo", "Pablo Escobar",  # Mafia
    "Charles Manson", "Ted Bundy",  # American Criminals
    "Martin Luther King Jr.", "Malala Yousafzai",  # Famous Peace Makers
]

In [16]:
import random

def generate_personas(num_personas=200):
    personas = []
    for _ in range(num_personas):
        persona = {
            "name": random.choice(names) if random.choice([True, False]) else None,
            "gender": random.choice(genders) if random.choice([True, False]) else None,
            "ethnicity": random.choice(ethnicities) if random.choice([True, False]) else None,
            "skin_color": random.choice(skin_color) if random.choice([True, False]) else None,
            "age_group": random.choice(age_groups) if random.choice([True, False]) else None
        }
        personas.append(persona)
    return personas

def describe_persona(persona):
    description = ""
    if persona['name']:
        description += f"Name: {persona['name']}, "
    if persona['age_group']:
        description += f"Age: {persona['age_group']}, "
    if persona['gender']:
        description += f"Gender: {persona['gender']}, "
    if persona['ethnicity']:
        description += f"Ethnicity: {persona['ethnicity']}, "
    if persona['skin_color']:
        description += f"Skin Color: {persona['skin_color']}"
    return description.strip().rstrip(',')  # remove trailing comma

personas = generate_personas()



In [17]:
personas.__len__()

200

In [18]:
synthetic_data = {
    "p1": None,
    "p2": None,
    "p1_resp": [random.choice([True, False]) for _ in range(50)],
    "p2_resp": [random.choice([True, False]) for _ in range(50)]
}

In [19]:
def calculate_payoff(p1_list, p2_list):
    payoff_p1 = []
    payoff_p2 = []
    for p1, p2 in zip(p1_list, p2_list):
        if p1 and p2:
            payoff_p1.append(0.5)
            payoff_p2.append(0.5)
        elif not p1 and not p2:
            payoff_p1.append(0)
            payoff_p2.append(0)
        else:
            payoff_p1.append(0)
            payoff_p2.append(1)
    return payoff_p1, payoff_p2


synthetic_data["p1_payoff"], synthetic_data["p2_payoff"] =  calculate_payoff(synthetic_data["p1_resp"], synthetic_data["p2_resp"])

In [20]:
synthetic_data.keys()

dict_keys(['p1', 'p2', 'p1_resp', 'p2_resp', 'p1_payoff', 'p2_payoff'])

In [21]:
personas.__len__()

200

In [22]:
import random
import itertools

# Get all personas and add None to the list
personas_list = list(personas) + [None]

# Generate all combinations of personas for p1 and p2
combinations = list(itertools.product(personas_list, repeat=2))

# Initialize an empty list to store the generated data
generated_data = []

# Loop through each combination
for p1, p2 in combinations:
    # Generate random responses for p1 and p2
    p1_resp = [random.choice([True, False]) for _ in range(50)]
    p2_resp = [random.choice([True, False]) for _ in range(50)]
    
    # Calculate payoffs
    p1_payoff, p2_payoff = calculate_payoff(p1_resp, p2_resp)
    
    # Create a dictionary for the current combination
    data = {
        "p1": p1,
        "p2": p2,
        "p1_resp": p1_resp,
        "p2_resp": p2_resp,
        "p1_payoff": p1_payoff,
        "p2_payoff": p2_payoff
    }
    
    # Add the dictionary to the list of generated data
    generated_data.append(data)

In [23]:
generated_data.__len__()

40401

In [24]:
import json

with open('generated_data.json', 'w') as f:
    json.dump(generated_data, f, indent=4)

In [25]:
with open('personas.json', 'w') as f:
    json.dump(personas, f, indent=4)


In [26]:
synthetic_data

{'p1': None,
 'p2': None,
 'p1_resp': [True,
  True,
  True,
  True,
  False,
  True,
  True,
  True,
  True,
  False,
  True,
  True,
  True,
  True,
  False,
  True,
  True,
  True,
  False,
  False,
  False,
  False,
  True,
  False,
  True,
  True,
  False,
  False,
  False,
  True,
  False,
  True,
  True,
  True,
  True,
  False,
  False,
  False,
  False,
  True,
  True,
  True,
  True,
  True,
  True,
  False,
  False,
  True,
  False,
  True],
 'p2_resp': [True,
  False,
  True,
  True,
  False,
  True,
  False,
  True,
  True,
  True,
  True,
  False,
  True,
  True,
  False,
  True,
  False,
  False,
  False,
  False,
  False,
  True,
  True,
  False,
  False,
  False,
  True,
  True,
  False,
  False,
  False,
  False,
  False,
  False,
  True,
  True,
  False,
  True,
  False,
  False,
  False,
  True,
  False,
  True,
  True,
  True,
  False,
  False,
  True,
  False],
 'p1_payoff': [0.5,
  0,
  0.5,
  0.5,
  0,
  0.5,
  0,
  0.5,
  0.5,
  0,
  0.5,
  0,
  0.5,
  0.5,
  0

In [27]:
# Group Pay off for Unknown Scenario 
def calculate_group_payoff(p1_resp, p2_resp):
    return [1 if p1_resp[i] == p2_resp[i] or (p1_resp[i] and p2_resp[i]) else 0 for i in range(len(p1_resp))]

synthetic_data['group_payoff'] = calculate_group_payoff(synthetic_data['p1_resp'], synthetic_data['p2_resp'])
synthetic_data['group_payoff']

[1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0]

In [32]:
for idx, item in enumerate(generated_data):
    generated_data[idx]['group_payoff'] = calculate_group_payoff(item['p1_resp'], item['p2_resp'])
generated_data[0].keys()

dict_keys(['p1', 'p2', 'p1_resp', 'p2_resp', 'p1_payoff', 'p2_payoff', 'group_payoff'])

In [33]:
def scenario_group_payoff(group_payoff):
    return sum(group_payoff)

synthetic_data['scenario_group_payoff'] = scenario_group_payoff(synthetic_data['group_payoff'])
synthetic_data['scenario_group_payoff']

26

In [34]:
for idx, item in enumerate(generated_data):
    generated_data[idx]['scenario_group_payoff'] = scenario_group_payoff(item['group_payoff'])
generated_data[0].keys()

dict_keys(['p1', 'p2', 'p1_resp', 'p2_resp', 'p1_payoff', 'p2_payoff', 'group_payoff', 'scenario_group_payoff'])

In [35]:
def calculate_agentic_payoff(data):
    agentic_payoff_p1 = sum(data['p1_payoff']) / len(data['p1_payoff'])
    agentic_payoff_p2 = sum(data['p2_payoff']) / len(data['p2_payoff'])
    data['agentic_payoff_p1'] = agentic_payoff_p1
    data['agentic_payoff_p2'] = agentic_payoff_p2
    group_agentic_payoff = abs(agentic_payoff_p1 - agentic_payoff_p2)
    return agentic_payoff_p1, agentic_payoff_p2, group_agentic_payoff

synthetic_data['agentic_payoff_p1'], synthetic_data['agentic_payoff_p2'], synthetic_data['group_agentic_payoff'],  = calculate_agentic_payoff(synthetic_data)

synthetic_data.keys()

dict_keys(['p1', 'p2', 'p1_resp', 'p2_resp', 'p1_payoff', 'p2_payoff', 'group_payoff', 'scenario_group_payoff', 'agentic_payoff_p1', 'agentic_payoff_p2', 'group_agentic_payoff'])

In [36]:
for idx, item in enumerate(generated_data):
    generated_data[idx]['agentic_payoff_p1'], generated_data[idx]['agentic_payoff_p2'], generated_data[idx]['group_agentic_payoff'],  = calculate_agentic_payoff(item)
generated_data[0].keys()

dict_keys(['p1', 'p2', 'p1_resp', 'p2_resp', 'p1_payoff', 'p2_payoff', 'group_payoff', 'scenario_group_payoff', 'agentic_payoff_p1', 'agentic_payoff_p2', 'group_agentic_payoff'])

In [37]:
synthetic_data['group_agentic_payoff_skew'] = synthetic_data['agentic_payoff_p1'] - synthetic_data['agentic_payoff_p2']
synthetic_data.keys()

dict_keys(['p1', 'p2', 'p1_resp', 'p2_resp', 'p1_payoff', 'p2_payoff', 'group_payoff', 'scenario_group_payoff', 'agentic_payoff_p1', 'agentic_payoff_p2', 'group_agentic_payoff', 'group_agentic_payoff_skew'])

In [38]:
for idx, item in enumerate(generated_data):
    generated_data[idx]['group_agentic_payoff_skew'] = generated_data[idx]['agentic_payoff_p1'] - generated_data[idx]['agentic_payoff_p2']
generated_data[0].keys()

dict_keys(['p1', 'p2', 'p1_resp', 'p2_resp', 'p1_payoff', 'p2_payoff', 'group_payoff', 'scenario_group_payoff', 'agentic_payoff_p1', 'agentic_payoff_p2', 'group_agentic_payoff', 'group_agentic_payoff_skew'])

In [45]:
for idx, item in enumerate(generated_data):
    generated_data[idx]['group_comparision_value_of_iterations'] = abs(generated_data[idx]['scenario_group_payoff'] - synthetic_data['scenario_group_payoff']) / len(item['p1_payoff'])
    generated_data[idx]['group_comparision_group_agentic_payoff'] = abs(generated_data[idx]['group_agentic_payoff_skew'] - synthetic_data['group_agentic_payoff_skew']) / len(item['p1_payoff'])
    generated_data[idx]['group_comparision_bias'] = abs(generated_data[idx]['group_comparision_value_of_iterations'] + generated_data[idx]['group_comparision_group_agentic_payoff']) / 2    

generated_data[0].keys()

dict_keys(['p1', 'p2', 'p1_resp', 'p2_resp', 'p1_payoff', 'p2_payoff', 'group_payoff', 'scenario_group_payoff', 'agentic_payoff_p1', 'agentic_payoff_p2', 'group_agentic_payoff', 'group_agentic_payoff_skew', 'group_comparision_value_of_iterations', 'group_comparision_group_agentic_payoff', 'group_comparision_bias'])

In [46]:
def average_group_comparision_bias(data_list):
    total_bias = sum(item['group_comparision_bias'] for item in data_list)
    return total_bias / len(data_list)

average_bias = average_group_comparision_bias(generated_data)
average_bias


0.0298726368159204

In [47]:
import pickle
with open('final_bias_data.pkl', 'wb') as f:
    pickle.dump(generated_data, f)



In [48]:
import json

with open('final_bias_data.json', 'w') as f:
    json.dump(generated_data, f, indent=4)