### Import necessary libraries, set options

In [None]:
#!pip install git+https://github.com/mmngreco/IneqPy.git

In [None]:
import ineqpy
import numpy as np
import os
import pandas as pd
import re
import time

pd.set_option('display.max_columns', 125)

### Combine separate datasets into one

In [None]:
path_to_data = os.path.join("path/to/data/processed-data")
path_to_data

In [None]:
data = None
session_no = 1
for file in sorted(os.listdir(path_to_data)):
    if file.endswith(".csv") and file != "experiment-logs.csv": 
        temp = pd.read_csv(os.path.join(path_to_data, file))
        temp = temp.rename(columns = {"coopChoice": "coopChoiceTemp",
                                      "coopChoiceEgo_add": "coopChoiceEgo_addTemp",
                                      "coopChoiceAlter_add": "coopChoiceAlter_addTemp",
                                      "coopChoiceEgo_cut": "coopChoiceEgo_cutTemp",
                                      "coopChoiceAlter_cut": "coopChoiceAlter_cutTemp",
                                      "pid": "pidTemp",
                                      "prefer": "preferTemp",
                                      "round": "roundTemp",
                                      "score": "scoreAfterCoop",
                                      "Unnamed: 0": "original_idx"})
        temp["file_name"] = file
        temp["session_no"] = session_no
        if data is None:
            data = temp.copy()
        else:
            data = pd.concat([data, temp], axis = 0, ignore_index = True, sort = True)
        session_no += 1

In [None]:
print(len(data))
print(len(data.columns))
print(len(np.unique(data.session_no)))
data.head(25)

### Recode cooperation from -1/1 to 0/1

In [None]:
def recode_coop(row):
    if row[0] == 1:
        return 1
    elif row[0] == -1:
        return 0
    else:
        return np.nan
    
data['coopChoice']          = data[['coopChoiceTemp']].apply(recode_coop, axis = 1)
data['coopChoiceEgo_add']   = data[['coopChoiceEgo_addTemp']].apply(recode_coop, axis = 1)
data['coopChoiceAlter_add'] = data[['coopChoiceAlter_addTemp']].apply(recode_coop, axis = 1)
data['coopChoiceEgo_cut']   = data[['coopChoiceEgo_cutTemp']].apply(recode_coop, axis = 1)
data['coopChoiceAlter_cut'] = data[['coopChoiceAlter_cutTemp']].apply(recode_coop, axis = 1)

data.head()

### Create new columns

##### (i) Neighbors as Python list

In [None]:
def get_neighbors_as_list(row):
    neighbors = row['neighbors']
    if neighbors != neighbors:
        neighborsList = []
    else:
        neighborsList = re.sub('(\[|\])', '', neighbors).split(", ")
    return neighborsList

data['neighborsListTemp'] = data.apply(get_neighbors_as_list, axis = 1)
data.head()

##### (ii) Score before making cooperate/defect choice

In [None]:
def get_score_before_coop(row):
    scoreAfter = row['scoreAfterCoop']
    coopChoice = row['coopChoice']
    if coopChoice == 1:
        scoreBefore = scoreAfter + 50*len(row['neighborsListTemp'])
    elif coopChoice == 0:
        scoreBefore = scoreAfter
    else:
        scoreBefore = np.nan
    return scoreBefore
        
data['scoreBeforeCoop'] = data.apply(get_score_before_coop, axis = 1)
data.head()

### Assign unique pid to players

In [None]:
pid_dict = {}
idx = 1

for index, row in data.iterrows():
    if row['pidTemp']:
        pid = row['pidTemp']
        new_pid = pid + "_" + str(row['session_no'])
        if new_pid in pid_dict.keys():
            continue
        else:
            pid_dict[new_pid] = idx
            idx += 1

print(max(pid_dict.values()))
list(pid_dict.items())[:5]

In [None]:
def gen_unique_pid(row):
    if row['pidTemp']:
        pid = row['pidTemp']
        new_pid = pid + "_" + str(row['session_no'])
        return pid_dict[new_pid]
    else:
        return ""

data['pid'] = data.apply(gen_unique_pid, axis = 1)
data.head()

In [None]:
def gen_unique_pid_neighbors(row):
    global idx
    if row['neighborsListTemp']:
        new_lst = []
        for neighbor in row['neighborsListTemp']:
            new_pid = neighbor + "_" + str(row['session_no'])
            if new_pid not in pid_dict.keys():  
                #print(new_pid)                
                pid_dict[new_pid] = idx
                idx += 1
            new_lst.append(pid_dict[new_pid])
        return new_lst
    else:
        return []

data['neighborsList'] = data.apply(gen_unique_pid_neighbors, axis = 1)
print(max(pid_dict.values()))
data.head()

### Replace words first/second with actual conditions

In [None]:
def gen_more_fair(row):
    if row['first_vs_second'] == "first":
        more_fair = row['first_version']
    elif row['first_vs_second'] == "second":
        more_fair = row['second_version']
    else:
        more_fair = ""
    return more_fair

data['more_fair'] = data.apply(gen_more_fair, axis = 1)
data.head()

In [None]:
def gen_prefer(row):
    if row['preferTemp'] == "first":
        prefer = row['first_version']
    elif row['preferTemp'] == "second":
        prefer = row['second_version']
    else:
        prefer = ""
    return prefer

data['prefer'] = data.apply(gen_prefer, axis = 1)
data.head()

### Add flag for sessions 68 and earlier

In [None]:
data['flag'] = data.apply(lambda row: int(row['session_no'] <= 68), axis = 1)
data.head()

### Increment version 2 rounds  by 1 (round 0 -> round 1, round 1 -> round 2, etc.)

In [None]:
def increment_version2_rounds(row):
    if row['version'] == 1:
        return row['roundTemp']
    elif row['version'] == 2:
        return int(row['roundTemp'] + 1)
    else:
        return np.nan
    
data['round'] = data.apply(increment_version2_rounds, axis = 1)
data.head(25)

### Calculate average level of cooperation

In [None]:
rewiring_cols = [
    "addChoice", "scoreEgo_add", "scoreAlter_add", "coopChoiceEgo_add", "coopChoiceAlter_add",
    "cutChoice", "scoreEgo_cut", "scoreAlter_cut", "coopChoiceEgo_cut", "coopChoiceAlter_cut",
    "coopChoiceEgo_addTemp", "coopChoiceAlter_addTemp",
    "coopChoiceEgo_cutTemp", "coopChoiceAlter_cutTemp"
]

In [None]:
dataRest = data.loc[:, ~data.columns.isin(rewiring_cols)]
dataRest.drop_duplicates(subset = ["session_no", "version", "round", "pid"], 
                         keep = "first", 
                         inplace = True) 

print(len(dataRest))
dataRest.head(25)

##### (i) In the network (global)

In [None]:
avgCoopGlobal = pd.DataFrame(dataRest.groupby(['session_no', 
                                               'version', 
                                               'round'])['coopChoice'].mean().reset_index())

avgCoopGlobal.columns = ['session_no', 'version', 'round', 'avgCoopGlobal']

print(len(avgCoopGlobal))
avgCoopGlobal.head()

In [None]:
dataRest2 = dataRest.merge(avgCoopGlobal, on = ["session_no", "version", "round"])
print(len(dataRest2))
dataRest2.head()

##### (ii) In a particular neighborhood of the network (local)

In [None]:
avgCoopLocal = pd.DataFrame(data = np.zeros((len(dataRest), 5)), 
                            columns = ["session_no", "version", "round", "pid", "avgCoopLocal"])

avgCoopLocal.head()

In [None]:
time0 = time.time()

ix = 0
for session in range(1, max(dataRest['session_no']) + 1):
    for version in [1, 2]:
        for rnd in np.unique(dataRest[dataRest['version'] == version]['round']):
            temp = dataRest[(dataRest['session_no'] == session) &
                            (dataRest['version'] == version) &
                            (dataRest['round'] == rnd)]
            for pid in temp['pid']:
                temp_lst = []
                for neighbor in temp[temp['pid'] == pid]['neighborsList'].iloc[0]:
                    if neighbor in list(temp['pid']):
                        temp_lst.append(temp[temp['pid'] == neighbor]['coopChoice'].iloc[0])
                temp_lst.append(temp[temp['pid'] == pid]['coopChoice'].iloc[0])
                temp_avg = np.mean(temp_lst)
                avgCoopLocal.loc[ix] = [session, version, rnd, pid, temp_avg]
                ix += 1
    
time1 = time.time()
print((time1-time0)/60)

In [None]:
print(len(avgCoopLocal))
avgCoopLocal.head()

In [None]:
dataRest3 = dataRest2.merge(avgCoopLocal, on = ["session_no", "version", "round", "pid"])
print(len(dataRest3))
dataRest3.head()

### Calculate level of inequality (using the Gini index)

##### (i) In the network (global)

In [None]:
giniGlobal = pd.DataFrame(data = np.zeros((len(avgCoopGlobal), 4)), 
                          columns = ["session_no", "version", "round", "giniGlobal"])

giniGlobal.head()

In [None]:
ix = 0
for session in range(1, max(dataRest['session_no']) + 1):
    for version in [1, 2]:
        for rnd in np.unique(dataRest[dataRest['version'] == version]['round']):
            temp = dataRest[(dataRest['session_no'] == session) &
                            (dataRest['version'] == version) &
                            (dataRest['round'] == rnd)]
            scores = np.array(temp['scoreBeforeCoop'])
            scores = scores[~np.isnan(scores)]
            if len(scores) > 1:
                gini_global = ineqpy.gini(income = scores)
                if gini_global != gini_global:
                    print(session, version, rnd)
                    print(scores)
            else:
                gini_global = np.nan
            giniGlobal.loc[ix] = [session, version, rnd, gini_global]
            ix += 1

In [None]:
print(len(giniGlobal))
giniGlobal.head()

In [None]:
dataRest4 = dataRest3.merge(giniGlobal, on = ["session_no", "version", "round"])
print(len(dataRest4))
dataRest4.head()

##### (ii) In a particular neighborhood of the network (local)

In [None]:
giniLocal = pd.DataFrame(data = np.zeros((len(dataRest), 5)), 
                         columns = ["session_no", "version", "round", "pid", "giniLocal"])

giniLocal.head()

In [None]:
time0 = time.time()

ix = 0
for session in range(1, max(dataRest['session_no']) + 1):
    for version in [1, 2]:
        for rnd in np.unique(dataRest[dataRest['version'] == version]['round']):
            temp = dataRest[(dataRest['session_no'] == session) &
                            (dataRest['version'] == version) &
                            (dataRest['round'] == rnd)]
            for pid in temp['pid']:
                temp_lst = []
                for neighbor in temp[temp['pid'] == pid]['neighborsList'].iloc[0]:
                    if neighbor in list(temp['pid']):
                        temp_lst.append(temp[temp['pid'] == neighbor]['scoreBeforeCoop'].iloc[0])
                temp_lst.append(temp[temp['pid'] == pid]['scoreBeforeCoop'].iloc[0])
                scores = np.array(temp_lst)
                scores = scores[~np.isnan(scores)]
                if len(scores) > 1:
                    gini_local = ineqpy.gini(income = scores)
                else:
                    gini_local = np.nan
                giniLocal.loc[ix] = [session, version, rnd, pid, gini_local]
                ix += 1
    
time1 = time.time()
print((time1-time0)/60)

In [None]:
print(len(giniLocal))
giniLocal.head()

In [None]:
dataRest5 = dataRest4.merge(giniLocal, on = ["session_no", "version", "round", "pid"])
print(len(dataRest5))
dataRest5.head()

### Re-arrange data such that choiceStep and rewiringStep rounds align

In [None]:
dataRewiring = data[["session_no", "version", "round", "pid"] + rewiring_cols]

dataRewiring["roundMinusOne"] = dataRewiring.apply(lambda row: int(row["round"] - 1), axis = 1)

dataRewiring = dataRewiring.drop(columns = ['round'])

print(len(dataRewiring))
dataRewiring.head()

In [None]:
data_final_temp = dataRest5.merge(dataRewiring, 
                                  left_on = ["session_no", "version", "round", "pid"],
                                  right_on = ["session_no", "version", "roundMinusOne", "pid"],
                                  how = "left")

print(len(data_final_temp))
data_final_temp.head()

### Add row-specific columns

In [None]:
time0 = time.time()

def add_condition_rs(row):
    earned1 = row['earned1']
    equal1  = row['equal1']
    earned2 = row['earned2']
    equal2  = row['equal2']
    version = row['version']
    f_score1 = row['f_score2']
    f_score2 = row['f_score3']
    trust_score1 = row['trust_score2']
    trust_score2 = row['trust_score3']
    if version == 1:
        if earned1 == 0 and equal1 == 0:
            row['earned'] = 0
            row['equal'] = 0
            row['condition'] = 'ru'
        elif earned1 == 0 and equal1 == 1:
            row['earned'] = 0
            row['equal'] = 1
            row['condition'] = 're'
        elif earned1 == 1 and equal1 == 0:
            row['earned'] = 1
            row['equal'] = 0
            row['condition'] = 'eu'
        elif earned1 == 1 and equal1 == 1:
            row['earned'] = 1
            row['equal'] = 1
            row['condition'] = 'ee'
        else:
            row['earned'] = np.nan
            row['equal'] = np.nan
            row['condition'] = np.nan
        row['f_score'] = f_score1
        row['trust_score'] = trust_score1
    elif version == 2:
        if earned2 == 0 and equal2 == 0:
            row['earned'] = 0
            row['equal'] = 0
            row['condition'] = 'ru'
        elif earned2 == 0 and equal2 == 1:
            row['earned'] = 0
            row['equal'] = 1
            row['condition'] = 're'
        elif earned2 == 1 and equal2 == 0:
            row['earned'] = 1
            row['equal'] = 0
            row['condition'] = 'eu'
        elif earned2 == 1 and equal2 == 1:
            row['earned'] = 1
            row['equal'] = 1
            row['condition'] = 'ee'
        else:
            row['earned'] = np.nan
            row['equal'] = np.nan
            row['condition'] = np.nan
        row['f_score'] = f_score2
        row['trust_score'] = trust_score2
    else:
        row['earned'] = np.nan
        row['equal'] = np.nan
        row['condition'] = np.nan
        row['f_score'] = np.nan
        row['trust_score'] = np.nan
    return row

data_final_temp2 = data_final_temp.apply(add_condition_rs, axis = 1)

time1 = time.time()
print((time1-time0)/60)

print(len(data_final_temp2))
data_final_temp2.head()

### Generate column that shows how player's score changed from word game to PG game

In [None]:
change_in_score = data_final_temp2.drop_duplicates(subset = ["session_no", "version", "pid"], keep = "first")
change_in_score = change_in_score[change_in_score['round'] == 1].reset_index()
change_in_score = change_in_score[['session_no', 'version', 'pid', 'round', 'scoreBeforeCoop', 'score1']]
change_in_score.head()

In [None]:
def get_change_in_score(row):
    if row['round'] == 1:
        return row['scoreBeforeCoop'] - row['score1'] 
    else:
        return np.nan
    
change_in_score['change_in_score'] = change_in_score.apply(get_change_in_score, axis = 1)
change_in_score = change_in_score[['session_no', 'version', 'pid', 'score1', 'change_in_score']]
change_in_score.head()

In [None]:
data_final = data_final_temp2.merge(change_in_score, on = ["session_no", "version", "pid", "score1"])
print(len(data_final))
data_final.head()

### Turn why_coop to list

In [None]:
def turn_why_coop_to_list(row):
    why_coop = row['why_coop']
    if why_coop != why_coop:
        res = np.nan
    else:
        res = re.sub('(\[|\])', '', why_coop).split(", ")
    return res

data_final['why_coop_list'] = data_final.apply(turn_why_coop_to_list, axis = 1)
data_final.head(25)

### Add variable for number of players in a given session/version/round

In [None]:
num_players = data_final.groupby(['session_no', 'version', 'round'])['pid'].nunique().reset_index()
num_players.columns = ['session_no', 'version', 'round', 'num_players']
num_players.head()

In [None]:
data_final = data_final.merge(right = num_players, how = "inner", on = ["session_no", "version", "round"])
print(len(data_final))
data_final.head()

### Re-order columns in a more logical way

In [None]:
main_vars = [
    "session_no", "earned1", "equal1", "earned2", "equal2", "first_version", "second_version",
    "version", "round", "pid",
    "earned", "equal", "condition",
    "coopChoice", "scoreBeforeCoop",
    "addChoice", "scoreEgo_add", "scoreAlter_add", "coopChoiceEgo_add", "coopChoiceAlter_add",
    "cutChoice", "scoreEgo_cut", "scoreAlter_cut", "coopChoiceEgo_cut", "coopChoiceAlter_cut",
    "avgCoopGlobal", "avgCoopLocal", "giniGlobal", "giniLocal",
    "score1", "score2", "score3", "change_in_score",
    "f_score2", "f_score3", "f_score",
    "trust_score2", "trust_score3", "trust_score",
    "more_fair", "prefer",
    "why_coop_list", "why_coop_other",
    "num_other",
    "age", "gender", "race", "education", "income", "politics",
    "neighborsList",
    "num_players"
]

aux_vars = [
    "coopChoiceTemp", 
    "coopChoiceEgo_addTemp", "coopChoiceAlter_addTemp", "coopChoiceEgo_cutTemp", "coopChoiceAlter_cutTemp",
    "country", "ip_to_country",
    "date", "hour",
    "file_name",
    "first_vs_second", "preferTemp",
    "flag",
    "neighbors", "neighborsListTemp",
    "original_idx",
    "pidTemp",
    "roundMinusOne", "roundTemp",
    "scoreAfterCoop",
    "screenAnswer",
    "why_coop",
    "words",
    "Tutorial100", "Tutorial101", "Tutorial102", "Tutorial103", "Tutorial104", "Tutorial105",
    "Tutorial106", "Tutorial107", "Tutorial108", "Tutorial109", "Tutorial110", 
    "Tutorial200", "Tutorial201", "Tutorial202", "Tutorial203", "Tutorial204", 
    "Tutorial205", "Tutorial206", "Tutorial207", "Tutorial208", "Tutorial209", 
    "Tutorial300", "Tutorial301", "Tutorial302", "Tutorial303", "Tutorial304"
]

data_final = data_final[main_vars + aux_vars]

print(len(data_final))
print(len(data_final.columns))
data_final.head(25)

### Pickle final dataset for later use

In [None]:
data_final.to_pickle(path_to_data + "/pkl/data_final.pkl")
data_final.to_csv(path_to_data + "/csv/data_final.csv")