# Code to clean simulation output into data for plotting

Slight modifications to the code to create plots used in the CogSci talk

This notebook requires existing simulation data to be in the `sim` and `sim_gn` folders. If data are not already in these folders, please download them from the repository or generate them using `run_simulation.py` and `run_simulation_gn.py`.

## Housekeeping

In [1]:
%reset -f
%pip install --upgrade pip
%pip install --upgrade -r requirements.txt

Collecting pip
  Downloading pip-23.2.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-23.2.1
Note: you may need to restart the kernel to use updated packages.
Collecting fonttools==4.39.4 (from -r requirements.txt (line 10))
  Using cached fonttools-4.39.4-py3-none-any.whl (1.0 MB)
Collecting numpy==1.24.3 (from -r requirements.txt (line 21))
  Using cached numpy-1.24.3-cp311-cp311-macosx_11_0_arm64.whl (13.8 MB)
Collecting pandas==2.0.1 (from -r requirements.txt (line 23))
  Using cached pandas-2.0.1-cp311-cp311-macosx_11_0_arm64.whl (10.7 MB)
Installing collected packages: numpy, fonttools, pandas
  Attempting uninstall: numpy
    Found existing installation: num

In [1]:
# Imports

import os
import re
import pandas as pd
import numpy as np
from tqdm import tqdm
import os

## Process Model 1 simulation

In [20]:
# For each simulation in sims_gn/
# - Load the data from the .csv file
# - Take the mean of the population counts for each strategy over the last 2500 generations and save it to the dataframe
# - Take the parameters and save them too
# - Finally, save the dataframe to a .csv file in data/

df = pd.DataFrame([])
sim = 0
for file in tqdm(os.listdir("cogsci-talk/data_gn/")):
    if file.endswith(".csv") and not file.startswith("model"):
        sim += 1
        df_sim = pd.read_csv(os.path.join("cogsci-talk/data_gn/", file))
        df_sim['count'] = df_sim.groupby(['gen', 'strategy'])['agent_id'].transform('count')
        df_sim.drop_duplicates(['gen', 'strategy', 'count'], inplace=True)
        df_sim = df_sim[df_sim['gen'] >= 2500]
        df_sim = df_sim.groupby('strategy')['count'].mean().reset_index()
        # Extract the parameters from the filename
        df_sim['delta'] = re.search(r'Delta=(\d+\.\d+)', file).group(1)
        df_sim['kappa'] = re.search(r'Kappa=(\d+\.\d+)', file).group(1)
        seed = re.search(r'seed=(\d+)', file).group(1)
        df_sim['sim'] = sim
        df_sim['seed'] = seed
        df = pd.concat([df, df_sim], ignore_index=True)

df.to_csv("cogsci-talk/data/model1_simulations_aggregated.csv", index=False)

100%|██████████| 1010/1010 [44:29<00:00,  2.64s/it]   


In [21]:
# Do the same as above, but for payoffs

df = pd.DataFrame([])
sim = 0
for file in tqdm(os.listdir("cogsci-talk/data_gn/")):
    if file.endswith(".csv") and not file.startswith("model"):
        sim += 1
        df_sim = pd.read_csv(os.path.join("cogsci-talk/data_gn/", file))
        df_sim['avg_payoff'] = df_sim.groupby(['gen', 'strategy'])['payoff'].transform('mean')
        df_sim.drop_duplicates(['gen', 'strategy', 'avg_payoff'], inplace=True)
        df_sim = df_sim[df_sim['gen'] >= 2500]
        df_sim = df_sim.groupby('strategy')['avg_payoff'].mean().reset_index()
        # Extract the parameters from the filename
        df_sim['delta'] = re.search(r'Delta=(\d+\.\d+)', file).group(1)
        df_sim['kappa'] = re.search(r'Kappa=(\d+\.\d+)', file).group(1)
        df_sim['seed'] = re.search(r'seed=(\d+)', file).group(1)
        df_sim['sim'] = sim
        df = pd.concat([df, df_sim], ignore_index=True)

df.to_csv("cogsci-talk/data/model1_simulations_aggregated_payoffs.csv", index=False)

100%|██████████| 1010/1010 [07:02<00:00,  2.39it/s]


## Process Model 2 simulation

In [4]:
df = pd.DataFrame([])
sim = 0
for file in tqdm(os.listdir("cogsci-talk/data/")):
    if file.endswith(".csv") and not file.startswith("model"):
        sim += 1
        df_sim = pd.read_csv(os.path.join("cogsci-talk/data/", file))
        df_sim['count'] = df_sim.groupby(['gen', 'strategy'])['agent_id'].transform('count')
        df_sim.drop_duplicates(['gen', 'strategy', 'count'], inplace=True)
        df_sim = df_sim[df_sim['gen'] >= 2500]
        df_sim = df_sim.groupby('strategy')['count'].mean().reset_index()
        # Extract the parameters from the filename
        df_sim['delta'] = re.search(r'Delta=(\d+\.\d+)', file).group(1)
        df_sim['lambda'] = re.search(r'Lambda=(\d+\.\d+)', file).group(1)
        df_sim['kappa'] = re.search(r'Kappa=(\d+\.\d+)', file).group(1)
        seed = re.search(r'seed=(\d+)', file).group(1)
        df_sim['sim'] = sim
        df_sim['seed'] = re.search(r'seed=(\d+)', file).group(1)
        df = pd.concat([df, df_sim], ignore_index=True)

df.to_csv("cogsci-talk/data/model2_simulations_aggregated.csv", index=False)

100%|██████████| 3034/3034 [23:59<00:00,  2.11it/s]


In [5]:
# Do the same as above, but for payoffs -- NOTE: this is not used in the paper figures

df = pd.DataFrame([])
sim = 0
for file in tqdm(os.listdir("cogsci-talk/data/")):
    if file.endswith(".csv") and not file.startswith("model"):
        sim += 1
        df_sim = pd.read_csv(os.path.join("cogsci-talk/data/", file))
        df_sim['avg_payoff'] = df_sim.groupby(['gen', 'strategy'])['payoff'].transform('mean')
        df_sim.drop_duplicates(['gen', 'strategy', 'avg_payoff'], inplace=True)
        df_sim = df_sim[df_sim['gen'] >= 2500]
        df_sim = df_sim.groupby('strategy')['avg_payoff'].mean().reset_index()
        # Extract the parameters from the filename
        df_sim['delta'] = re.search(r'Delta=(\d+\.\d+)', file).group(1)
        df_sim['lambda'] = re.search(r'Lambda=(\d+\.\d+)', file).group(1)
        df_sim['kappa'] = re.search(r'Kappa=(\d+\.\d+)', file).group(1)
        df_sim['sim'] = sim
        df_sim['seed'] = re.search(r'seed=(\d+)', file).group(1)
        df = pd.concat([df, df_sim], ignore_index=True)

df.to_csv("cogsci-talk/data/model2_simulations_aggregated_payoffs.csv", index=False)

  0%|          | 0/3034 [00:00<?, ?it/s]

100%|██████████| 3034/3034 [56:16<00:00,  1.11s/it]   


Check we ran all the simulations we were meant to

In [63]:
params_to_run = pd.DataFrame([])

params1 = pd.read_csv("cogsci-talk/params.csv")
params2 = pd.read_csv("cogsci-talk/params2.csv")
params = pd.concat([params1, params2], ignore_index=True)

for seed in range(42, 52, 1):
    params['Seed'] = seed
    params_to_run = pd.concat([params_to_run, params], ignore_index=True)

params_to_run.sort_values(by=['Delta', 'Lambda', 'Kappa', 'Seed'], inplace=True)
params_to_run.reset_index(drop=True, inplace=True)

In [64]:
# Load the parameters that have already been run, and find those we still need to run

# Extract parameters from the filenames
already_run = pd.DataFrame([])

for file in os.listdir("cogsci-talk/data/"):
    if file.endswith(".csv") and not file.startswith("model"):
        already_run = pd.concat([already_run, pd.DataFrame([float(re.search(r'Delta=(\d+\.\d+)', file).group(1)), 
                                                            float(re.search(r'Lambda=(\d+\.\d+)', file).group(1)), 
                                                            float(re.search(r'Kappa=(\d+\.\d+)', file).group(1)), 
                                                            int(re.search(r'seed=(\d+)', file).group(1))]).T], ignore_index=True)

already_run.columns = ['Delta', 'Lambda', 'Kappa', 'Seed']

already_run = already_run.sort_values(['Delta', 'Lambda', 'Seed']).reset_index(drop=True)

#already_run = pd.read_csv("cogsci-talk/data/model2_simulations_aggregated.csv")
#already_run_copy = already_run.copy()
#already_run = already_run[['delta', 'lambda', 'kappa', 'seed']].drop_duplicates()

# Change the column names to match the params dataframe
#already_run.columns = ['Delta', 'Lambda', 'Kappa', 'Seed']
#already_run.to_csv("cogsci-talk/params2_already_run.csv", index=False)

In [65]:
# Find the parameters we still need to run
params_to_run = params_to_run.merge(already_run, how='left', indicator=True)
params_not_run = params_to_run[params_to_run['_merge'] == 'left_only']

# Save the parameters we still need to run to two files, splitting them up so that we can run them in parallel
params_not_run.iloc[:len(params_not_run)//2].to_csv("cogsci-talk/params_not_run1.csv", index=False)
params_not_run.iloc[len(params_not_run)//2:].to_csv("cogsci-talk/params_not_run2.csv", index=False)