This notebook preprocesses the MobLab dataset. The processed data is saved in the `data/MobLab/joint.csv` file, where the rows are players and the columns are games. Only the first-round records are kept. 

In [1]:
import pandas as pd
from collections import defaultdict

In [2]:
df_joint = pd.read_csv('data/MobLab/joint.csv')
df_joint

Unnamed: 0,UserID,dictator,ultimatum_1,ultimatum_2,trust_1,trust_3,PG,bomb
0,131074,,,,,,,
1,131075,,,,,,,
2,131078,,,,,,,
3,131079,,,,,,,
4,131080,,,,,,,
...,...,...,...,...,...,...,...,...
68774,131064,,,,,,,
68775,262137,,,,,,3.0,
68776,393211,,,,,,,51.0
68777,131068,,,,,,,


In [3]:
# number of non-NaN entries excluding the UserID column
print(df_joint.notnull().sum().sum() - len(df_joint))

82057


## Users

In [None]:
users = []

files = [
    'data/MobLab/dictator.csv',
    'data/MobLab/ultimatum_strategy.csv',
    'data/MobLab/trust_investment.csv',
    'data/MobLab/public_goods_linear_water.csv',
    'data/MobLab/bomb_risk.csv',
]

for file in files:
    df = pd.read_csv(file)
    users.extend(df['UserID'].unique())

df_joint = pd.DataFrame(columns=['UserID', 'dictator', 'ultimatum_1', 'ultimatum_2', 'trust_1', 'trust_3', 'PG', 'bomb'])
df_joint['UserID'] = list(set(users))
df_joint

## Games
### Dictator

In [None]:
df = pd.read_csv('data/MobLab/dictator.csv')
df = df[df['gameType'] == 'dictator']
df = df[df['Role'] == 'first']
df = df[df['Round'] == 1]
df = df[df['Total'] == 100]
df = df[df['move'] != 'None']

binrange = (0, 100)
df['move'] = df['move'].astype(float)
df['move'] = df['move'].apply(lambda x: x if binrange[0] <= x <= binrange[1] else None)
df = df.dropna(subset=['move'])

for user in df['UserID'].unique():
    move = df[df['UserID'] == user]['move'].values[0]
    df_joint.loc[df_joint['UserID'] == user, 'dictator'] = move
df_joint

### Ultimatum

In [None]:
df = pd.read_csv('data/Moblab/ultimatum_strategy.csv')
df = df[df['gameType'] == 'ultimatum_strategy']
df = df[df['Role'] == 'player']
df = df[df['Round'] == 1]
df = df[df['Total'] == 100]
df = df[df['move'] != 'None']
df['propose'] = df['move'].apply(lambda x: eval(x)[0])
df['accept'] = df['move'].apply(lambda x: eval(x)[1])
df = df[(df['propose'] >= 0) & (df['propose'] <= 100)]
df = df[(df['accept'] >= 0) & (df['accept'] <= 100)]
# df.head()

for user in df['UserID'].unique():
    propose = df[df['UserID'] == user]['propose'].values[0]
    accept = df[df['UserID'] == user]['accept'].values[0]
    df_joint.loc[df_joint['UserID'] == user, 'ultimatum_1'] = propose
    df_joint.loc[df_joint['UserID'] == user, 'ultimatum_2'] = accept
df_joint

### Trust

In [None]:
df = pd.read_csv('data/MobLab/trust_investment.csv')
df = df[df['gameType'] == 'trust_investment']
df = df[df['Round'] == 1]
df = df[df['move'] != 'None']

binrange = (0, 100)
df['move'] = df['move'].astype(float)

# for Role == first, drop rows with move out of the binrange
df1 = df[df['Role'] == 'first']
df1['move'] = df1['move'].apply(lambda x: x if binrange[0] <= x <= binrange[1] else None)
df1 = df1.dropna(subset=['move'])

df2 = df[df['Role'] == 'second']
df2['inv'] = df2['roundResult'].apply(lambda x: eval(x)[0])
df2['ret'] = df2['roundResult'].apply(lambda x: eval(x)[1])
df2 = df2[(df2['ret'] >= 0) & (df2['ret'] <= df2['inv'] * 3)]
df2['move'] = df2['ret']

for user in df1['UserID'].unique():
    move = df1[df1['UserID'] == user]['move'].values[0]
    df_joint.loc[df_joint['UserID'] == user, 'trust_1'] = move
for user in df2['UserID'].unique():
    move = df2[(df2['inv'] == 50) & (df2['UserID'] == user)]['move'].values
    if len(move) == 0:
        continue
    df_joint.loc[df_joint['UserID'] == user, 'trust_3'] = move[0]
df_joint

### Public Goods

In [None]:
df = pd.read_csv('data/MobLab/public_goods_linear_water.csv')
df = df[df['Role'] == 'contributor']
df = df[df['Round'] == 3]
df = df[df['Total'] == 20]
df = df[df['groupSize'] == 4]
df = df[df['move'] != None]
df = df[(df['move'] >= 0) & (df['move'] <= 20)]
df = df[df['gameType'] == 'public_goods_linear_water']

for user in df['UserID'].unique():
    move = df[df['UserID'] == user]['move'].values[0]
    df_joint.loc[df_joint['UserID'] == user, 'PG'] = move
df_joint

### Bomb Risk

In [None]:
df = pd.read_csv('data/MobLab/bomb_risk.csv')
df = df[df['Role'] == 'player']
df = df[df['gameType'] == 'bomb_risk']
df.sort_values(by=['UserID', 'Round'])

prefix_to_choices_human = defaultdict(list)
prefix_to_IPW = defaultdict(list)
prev_user = None
prev_move = None
prefix = ''
bad_user = False
for _, row in df.iterrows():
    if bad_user: continue
    if row['UserID'] != prev_user:
        prev_user = row['UserID']
        prefix = ''
        bad_user = False

    move = row['move']
    if move < 0 or move > 100:
        bad_users = True
        continue
    prefix_to_choices_human[prefix].append(move)

    if len(prefix) == 0:
        prefix_to_IPW[prefix].append(1)
        df_joint.loc[df_joint['UserID'] == prev_user, 'bomb'] = move
    elif prefix[-1] == '1':
        prev_move = min(prev_move, 98)
        prefix_to_IPW[prefix].append(1./(100 - prev_move))
    elif prefix[-1] == '0':
        prev_move = max(prev_move, 1)
        prefix_to_IPW[prefix].append(1./(prev_move))
    else: assert False
    
    prev_move = move

    prefix += '1' if row['roundResult'] == 'SAFE' else '0'

## Export

In [None]:
# df_joint.to_csv('data/MobLab/joint.csv', index=False)