In [1]:
import sys
sys.path.append('../implementation/')
import ast
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.special as sp
from tqdm import tqdm
import time
from util import flatten_list
import warnings
warnings.filterwarnings('ignore')
from zhou_analytic_focus import AnalyticFocusModel

In [4]:
# Loading the underlying data and user interaction data
data_path = '../data/boardrooms/boardrooms_data.csv'
ui_data_path = '../data/boardrooms/boardrooms_combined_interactions.csv'
output_file_path = '../output/boardrooms/boardrooms_af.pkl'

underlying_data = pd.read_csv(data_path)
# underlying_data['industry_code'] = pd.factorize(underlying_data['industry'])[0]
interaction_data = pd.read_csv(ui_data_path)
interaction_data['interaction_session'] = interaction_data.apply(lambda row: ast.literal_eval(row.interaction_session), axis=1)

ks = [1, 5, 10, 20, 50, 100]
d_attributes = ['industry']
c_attributes = ['mktcap', 'unrelated', 'female', 'age', 'tenure', 'medianpay']

underlying_data = underlying_data[d_attributes + flatten_list(c_attributes)].copy()

In [5]:
af_results = pd.DataFrame()

for participant_index, row in interaction_data.iterrows():
    print(f'Processing user {row.user} task {row.task}')
    results = {'participant_id': row.user, 'task': row.task}
    model = AnalyticFocusModel(underlying_data, c_attributes, d_attributes)
    predicted = pd.DataFrame()
    for i in tqdm(range(len(interaction_data.iloc[participant_index].interaction_session))):
        interaction = interaction_data.iloc[participant_index].interaction_session[i]
        model.update(interaction)

        if i < len(interaction_data.iloc[participant_index].interaction_session) - 1:
            probability_of_next_point = model.predict()
            next_point = interaction_data.iloc[participant_index].interaction_session[i+1]
            predicted_next_dict = {}
            for k in ks:
                predicted_next_dict[k] = (next_point in probability_of_next_point.nlargest(k).index.values)
            predicted = predicted.append(predicted_next_dict, ignore_index=True)
    ncp = predicted.sum()/len(predicted)
    for col in ncp.index:
        results[f'ncp-{col}'] = ncp[col]
        
    af_results = af_results.append(results, ignore_index=True)
    
af_results.to_pickle(output_file_path)

Processing user 1 task 1


100%|██████████| 20/20 [00:00<00:00, 80.31it/s]


Processing user 4 task 1


100%|██████████| 14/14 [00:00<00:00, 77.50it/s]


Processing user 12 task 1


100%|██████████| 81/81 [00:01<00:00, 72.50it/s]


Processing user 16 task 1


100%|██████████| 16/16 [00:00<00:00, 55.75it/s]


Processing user 28 task 1


100%|██████████| 52/52 [00:00<00:00, 89.42it/s]


Processing user 34 task 1


100%|██████████| 7/7 [00:00<00:00, 61.32it/s]


Processing user 38 task 1


100%|██████████| 47/47 [00:00<00:00, 67.78it/s]


Processing user 39 task 1


100%|██████████| 4/4 [00:00<00:00, 70.08it/s]


Processing user 40 task 1


100%|██████████| 36/36 [00:00<00:00, 58.96it/s]


Processing user 44 task 1


100%|██████████| 16/16 [00:00<00:00, 93.39it/s]


Processing user 50 task 1


100%|██████████| 11/11 [00:00<00:00, 82.01it/s]


Processing user 57 task 1


100%|██████████| 57/57 [00:00<00:00, 70.56it/s]


Processing user 58 task 1


100%|██████████| 2/2 [00:00<00:00, 95.26it/s]


Processing user 59 task 1


100%|██████████| 26/26 [00:00<00:00, 78.66it/s]


Processing user 61 task 1


100%|██████████| 30/30 [00:00<00:00, 75.08it/s]


Processing user 63 task 1


100%|██████████| 12/12 [00:00<00:00, 66.06it/s]


Processing user 77 task 1


100%|██████████| 22/22 [00:00<00:00, 93.57it/s]


Processing user 83 task 1


100%|██████████| 35/35 [00:00<00:00, 80.48it/s]


Processing user 86 task 1


100%|██████████| 49/49 [00:00<00:00, 79.02it/s]


Processing user 91 task 1


100%|██████████| 13/13 [00:00<00:00, 69.92it/s]


Processing user 100 task 1


100%|██████████| 34/34 [00:00<00:00, 70.61it/s]


Processing user 106 task 1


100%|██████████| 21/21 [00:00<00:00, 68.10it/s]


Processing user 110 task 1


100%|██████████| 12/12 [00:00<00:00, 73.37it/s]


Processing user 117 task 1


100%|██████████| 42/42 [00:00<00:00, 74.87it/s]


Processing user 118 task 1


100%|██████████| 8/8 [00:00<00:00, 81.39it/s]


Processing user 119 task 1


100%|██████████| 25/25 [00:00<00:00, 93.18it/s]


Processing user 123 task 1


100%|██████████| 28/28 [00:00<00:00, 97.78it/s]


Processing user 130 task 1


100%|██████████| 25/25 [00:00<00:00, 95.60it/s]


Processing user 133 task 1


100%|██████████| 48/48 [00:00<00:00, 92.29it/s]


Processing user 141 task 1


100%|██████████| 79/79 [00:01<00:00, 78.82it/s]


Processing user 146 task 1


100%|██████████| 29/29 [00:00<00:00, 79.82it/s]


Processing user 157 task 1


100%|██████████| 5/5 [00:00<00:00, 90.25it/s]


Processing user 164 task 1


100%|██████████| 37/37 [00:00<00:00, 77.94it/s]


Processing user 165 task 1


100%|██████████| 49/49 [00:00<00:00, 69.97it/s]


Processing user 166 task 1


100%|██████████| 9/9 [00:00<00:00, 73.25it/s]


Processing user 177 task 1


100%|██████████| 30/30 [00:00<00:00, 57.67it/s]


Processing user 180 task 1


100%|██████████| 18/18 [00:00<00:00, 90.85it/s]


Processing user 181 task 1


100%|██████████| 2/2 [00:00<00:00, 116.31it/s]


Processing user 182 task 1


100%|██████████| 15/15 [00:00<00:00, 64.70it/s]


Processing user 183 task 1


100%|██████████| 20/20 [00:00<00:00, 67.68it/s]


Processing user 185 task 1


100%|██████████| 14/14 [00:00<00:00, 98.19it/s]
