In [3]:
import pandas as pd
import random
import ast
import numpy as np
import scipy.stats as stats 
from scipy.spatial import distance
import statsmodels.stats.multitest as multitest

import statsmodels.stats.api as sms
import seaborn as sns
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import plotly.express as px
import plotly.graph_objects as go
import os, glob
import sys
import pickle
import itertools
import concurrent.futures
from statsmodels.stats.multitest import multipletests
from sklearn.decomposition import PCA
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.linear_model import Lasso, LassoCV
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
import threading
import multiprocessing
from concurrent.futures import ThreadPoolExecutor
import queue

from tqdm import tqdm

plt.rcParams["font.family"] = "Times New Roman"

import warnings
warnings.filterwarnings("ignore")

# Read in the data

In [4]:
full_multi_task_data = pd.read_csv('all_multi_task_wave_data.txt')

In [5]:
full_multi_task_data = full_multi_task_data.rename(columns = {"task": "task_name"})

In [6]:
# set the score to the best score across repeated attempts, in cases where it saved multiple times
full_multi_task_data = full_multi_task_data.groupby('stageId').apply(lambda x: x.loc[x['score'].idxmax()]).reset_index(drop=True)

In [7]:
# task map
task_map = pd.read_csv('task_map.csv')
task_map = task_map.rename(columns = {"task": "task_name"})

In [8]:
task_name_mapping = {
    "Sudoku": "Sudoku",
    "Moral Reasoning": "Moral Reasoning (Disciplinary Action Case)",
    "Wolf Goat Cabbage": "Wolf, goat and cabbage transfer",
    "Guess the Correlation": "Guessing the correlation",
    "Writing Story": "Writing story",
    "Room Assignment": "Room assignment task",
    "Allocating Resources": "Allocating resources to programs",
    "Divergent Association": "Divergent Association Task",
    "Word Construction": "Word construction from a subset of letters",
    "Whac a Mole": "Whac-A-Mole",
    "Random Dot Motion": "Random dot motion",
    "Recall Association": "Recall association",
    "Recall Word Lists": "Recall word lists",
    "Typing": "Typing game",
    "Unscramble Words": "Unscramble words (anagrams)",
    "WildCam": "Wildcam Gorongosa (Zooniverse)",
    "Advertisement Writing": "Advertisement writing",
    "Putting Food Into Categories": "Putting food into categories"
}

task_map = task_map.rename(
    columns = {
        "Q1concept_behav": "Conceptual-Behavioral",
        "Q3type_1_planning": "Type 1 (Planning)",
        "Q4type_2_generate": "Type 2 (Generate)",
        "Q6type_5_cc": "Type 5 (Cognitive Conflict)",
        "Q7type_7_battle": "Type 7 (Battle)",
        "Q8type_8_performance": "Type 8 (Performance)",
        "Q9divisible_unitary": "Divisible-Unitary",
        "Q10maximizing": "Maximizing",
        "Q11optimizing": "Optimizing",
        "Q13outcome_multip": "Outcome Multiplicity",
        "Q14sol_scheme_mul": "Solution Scheme Multiplicity",
        "Q15dec_verifiability": "Decision Verifiability",
        "Q16shared_knowledge": "Shared Knowledge",
        "Q17within_sys_sol": "Within-System Solution",
        "Q18ans_recog": "Answer Recognizability",
        "Q19time_solvability": "Time Solvability",
        "Q20type_3_type_4": "Type 3 and Type 4 (Objective Correctness)",
        "Q22confl_tradeoffs": "Conflicting Tradeoffs",
        "Q23ss_out_uncert": "Solution Scheme Outcome Uncertainty",
        "Q24eureka_question": "Eureka Question",
        "Q2intel_manip_1" : "Intellectual-Manipulative",
        "Q21intellective_judg_1" : "Intellective-Judgmental",
        "Q5creativity_input_1" : "Creativity Input",
        "Q25_type6_mixed_motive" : "Type 6 (Mixed-Motive)"
    }
)

In [9]:
full_multi_task_data.loc[:, "task_name"] = full_multi_task_data["task_name"].replace(task_name_mapping)

In [10]:
task_cols_to_use = task_map.drop(["task_name", "Type 6 (Mixed-Motive)"], axis = 1).columns

In [11]:
# merge the multi-task data with the task map
full_multi_task_data = pd.merge(left = full_multi_task_data, right = task_map, on = "task_name", how = "left")

In [12]:
communication_features = pd.read_csv("full_multi_task_messages_conversation_level.csv")
communication_features = communication_features.rename(columns={"conversation_num": "stageId"})
communication_features.columns
communication_features = communication_features.drop(columns = ['speaker_nickname', 'message',
       'timestamp', 'message_original', 'message_lower_with_punc'], axis = 1)

In [13]:
COMMS_DVS = ["turn_taking_index", "gini_coefficient_sum_num_messages", "sum_num_messages", "average_positive_bert", "team_burstiness"]

# Final Cleaned Datasets

## Task Sets: (1) Wave 1 Only; (2) All 20 Tasks
- `team_multi_task_wave1`
- `team_multi_task_full`

## Communication Features
We examine 5 communication features:
- Turn-Taking
- Gini Coefficient
- Total Number of Messages
- Positivity
- Burstiness

When analyzed in the context of the two different task sets, we get:

- `team_multi_task_comms_wave1`
- `team_multi_task_comms_full`

In [14]:
team_multi_task_full = full_multi_task_data[full_multi_task_data["playerCount"]>1]

In [15]:
team_multi_task_wave1 = team_multi_task_full[team_multi_task_full["wave"]==1]

In [16]:
team_multi_task_comms_full = pd.merge(communication_features, team_multi_task_full, on = "stageId", how = "inner")

In [17]:
team_multi_task_comms_wave1 = pd.merge(communication_features, team_multi_task_wave1, on = "stageId", how = "inner")

## Additional Preprocessing
- Standardize all task columns
- Standardize all dependent variables

Note that we should _separately_ preprocess and standardize the interactions / composition variables when we add them in at the regression stage!

In [18]:
cols_to_use = ["score"] + list(task_cols_to_use)
cols_to_use_with_comms = COMMS_DVS + ["score"] + list(task_cols_to_use)

In [19]:
team_multi_task_full[cols_to_use] = StandardScaler().fit_transform(team_multi_task_full[cols_to_use])
team_multi_task_wave1[cols_to_use] = StandardScaler().fit_transform(team_multi_task_wave1[cols_to_use])
team_multi_task_comms_full[cols_to_use_with_comms] = StandardScaler().fit_transform(team_multi_task_comms_full[cols_to_use_with_comms])
team_multi_task_comms_wave1[cols_to_use_with_comms] = StandardScaler().fit_transform(team_multi_task_comms_wave1[cols_to_use_with_comms])

# Define Functions for Calculating Q^2

In [20]:
def reshape_x_y(x, y):
    if(isinstance(x, pd.Series)):
        x = np.asarray(x).reshape(-1, 1)
    else:
        x = np.asarray(x)
    
    y = np.asarray(y).reshape(-1, 1)
    return(x, y)

In [21]:
def q2_task_holdout_helper(x_train, x_test, y_train, y_test, estimator):
    
    # some reshaping
    x_train_array, y_train_array = reshape_x_y(x_train, y_train)
    x_test_array, y_test_array = reshape_x_y(x_test, y_test)

    # print("Training data: ", pd.DataFrame(x_train_array).head())
    # print("Testing data: ", pd.DataFrame(x_test_array).head())

    # Fit the model and get the error
    fitted_model = estimator.fit(X=x_train_array, y=y_train_array.ravel())
    
    # save prediction error
    prediction = fitted_model.predict(x_test_array)

    # flatten all arrays
    y_test_array = np.asarray(y_test_array).flatten()
    prediction = np.asarray(prediction).flatten()

    # print("y test array", y_test_array)
    # print("prediction", prediction)

    squared_model_prediction_error = (y_test_array - prediction) ** 2

    # save total error for this fold
    squared_average_prediction_error = (y_test_array - np.mean(y_train_array)) ** 2

    return squared_model_prediction_error, squared_average_prediction_error

In [22]:
"""
This is the version of q^2 that holds out EVERYTHING associated with a given task

It trains on all task instances from the "seen" classes, and it tests on task instances of held-out (unseen) classes.

NOTE: this version of the function assumes that x and y are passed in with a column called "task_name"
"""

def get_q2(y, x, estimator = Lasso(), num_task_holdouts = 1):

    squared_model_prediction_errors = []
    squared_average_prediction_errors = []

    num_total_tasks = x["task_name"].nunique()

    # randomly hold out `num_task_holdouts`
    all_possible_task_combos = list(itertools.combinations((x["task_name"].unique()), num_total_tasks - num_task_holdouts))
    
    for sample in all_possible_task_combos:

        # print("Sample:", sample)
        # print("Held out:", x[~x["task_name"].isin(sample)]["task_name"].unique())

        x_train_tasks = x[x["task_name"].isin(sample)].drop("task_name", axis = 1)
        x_test_tasks = x[~x["task_name"].isin(sample)].drop("task_name", axis = 1)

        y_train_tasks = y[y["task_name"].isin(sample)].drop("task_name", axis = 1)
        y_test_tasks = y[~y["task_name"].isin(sample)].drop("task_name", axis = 1)

        # get evaluation score by training on the training tasks and evaluating on the holdout tasks
        squared_model_prediction_error, squared_average_prediction_error = q2_task_holdout_helper(x_train_tasks, x_test_tasks, y_train_tasks, y_test_tasks, estimator)
        
        squared_model_prediction_errors.append(squared_model_prediction_error)
        squared_average_prediction_errors.append(squared_average_prediction_error)

    squared_model_prediction_error = np.asarray(squared_model_prediction_error).flatten()
    squared_average_prediction_error = np.asarray(squared_average_prediction_error).flatten()

    return 1 - (np.sum(squared_model_prediction_error) / np.sum(squared_average_prediction_error))


# Set up "Us versus McGrath"

In [23]:
mcgrath_colnames = [col for col in task_map.columns if "Type" in col]
# remove type 6, as it is not relevant for our data
mcgrath_colnames.remove('Type 6 (Mixed-Motive)')

In [24]:
def get_mcgrath_categorical(df):
    mcgrath_categorical_buckets = {}
    
    # first, convert everything into one of mcgrath's categories
    for i in range(len(df)):
        task_vec_mcgrath = df[mcgrath_colnames].iloc[i][1:]
        task_name = df.iloc[i]["task_name"]
        task_type = task_vec_mcgrath.idxmax()
        type_val = task_vec_mcgrath[task_type]
        if task_type == "Type 3 and Type 4 (Objective Correctness)":
            task_type = "Type 3 (Intellective)" if type_val > 0.5 else "Type 4 (Decision-Making)"

        mcgrath_categorical_buckets[task_name] = task_type
        
    mcgrath_df = pd.DataFrame({
        "task_name": mcgrath_categorical_buckets.keys(),
        "mcgrath_category": mcgrath_categorical_buckets.values()
    })

    mcgrath_df_categorical = pd.concat([mcgrath_df["task_name"], pd.get_dummies(mcgrath_df["mcgrath_category"], dtype= int).add_suffix('_cat')], axis = 1)

    mcgrath_categorical = list(mcgrath_df_categorical.columns)
    mcgrath_categorical.remove("task_name")
    
    # after calculating the categories, return the dataframe
    return (df.merge(mcgrath_df_categorical, on = "task_name"), mcgrath_categorical)

In [25]:
# OUR FOUR DATASETS ARE:
# team_multi_task_full, team_multi_task_wave1
# team_multi_task_comms_full, team_multi_task_comms_wave1

def get_mcgrath_comparisons(datasets, dvs):
    
    comparison_dict_list = [{} for i in range(len(datasets))]
    
    for i, dataset in enumerate(datasets):
        comparison_dict = comparison_dict_list[i]
        
        dataset, mcgrath_categorical = get_mcgrath_categorical(dataset)

        for dv in dvs:
            comparison_dict[dv] = {}
            comparison_dict[dv]["mcgrath_continuous"] = get_q2(dataset[[dv, "task_name"]], dataset[mcgrath_colnames + ["playerCount", "Low", "Medium", "task_name"]], estimator = LinearRegression())
            comparison_dict[dv]["mcgrath_categorical"] = get_q2(dataset[[dv, "task_name"]], dataset[mcgrath_categorical + ["playerCount", "Low", "Medium", "task_name"]], estimator = LinearRegression())
            comparison_dict[dv]["all_features"] = get_q2(dataset[[dv, "task_name"]], dataset[list(task_cols_to_use) + ["playerCount", "Low", "Medium", "task_name"]], estimator = LinearRegression())
            
    return comparison_dict_list

Our dictionaries of looking at McGrath versus the rest:
1. `mcgrath_comparisons_full`
2. `mcgrath_comparisons_wave1`
3. `mcgrath_comparisons_conv_full`
4. `mcgrath_comparisons_conv_wave1`

In [26]:
mcgrath_comparisons_full, mcgrath_comparisons_wave1 = get_mcgrath_comparisons([team_multi_task_full, team_multi_task_wave1], ["score"])

In [27]:
mcgrath_comparisons_conv_full, mcgrath_comparisons_conv_wave1 = get_mcgrath_comparisons([team_multi_task_comms_full, team_multi_task_comms_wave1], ["score"])

In [28]:
# example of looking at the dictionary
mcgrath_comparisons_full

{'score': {'mcgrath_continuous': 0.12797385310435472,
  'mcgrath_categorical': 0.20664078198140312,
  'all_features': -7.386896991380263e+23}}

# Exhaustive Search Procedure

In [29]:
def process_combination(task_col_combo, dataset, dv, filename):
    return get_q2(
                dataset[[dv, "task_name"]],
                dataset[list(task_col_combo) + ["playerCount", "Low", "Medium", "task_name"]],
                estimator = LinearRegression() ## get_q2 defaults to LASSO, so let's run this with OLS
    )

"""
function: parallel_q2
---
This is the function that (in parallel) gets the q^2 values of each
of the possible combinations / ways of selecting task columns.

@dataset: the dataset that we are using for the prediction
@dv: the name of the dependent variable. We expect this to be a column in the dataset.
@filename: the name of the output file (as we finish processing the combinations, the results will be written to this file).
@column_choice_combinations: the list of all possible column choice combinations.
"""
def parallel_q2(dataset, dv, filename, column_choice_combinations, results, lock):
    num_threads = multiprocessing.cpu_count()  # Get as many processes as CPU's
    assert len(results) == 0  # Assert that we start out with no results
    
    with tqdm(total=len(column_choice_combinations)) as pbar:
        with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
            def wrapper(task_col_combo):
                try:
                    q2 = process_combination(task_col_combo, dataset, dv, filename)                    
                    if isinstance(q2, (int, float)):  # Ensure q2 is a valid number
                        result_to_append = (str(task_col_combo), q2)
                        assert(len(result_to_append) == 2) # assert that there are exactly 2 elements
                        
                        with lock: # do this with a lock
                            results.append(result_to_append)
                            output_filename = str(len(task_col_combo)) + "_" + dv + "_" + filename                    
                            df_results = pd.DataFrame(results, columns=["selected_task_cols", "q2"]).drop_duplicates()
                            df_results.to_csv(output_filename, index=False)
                except Exception as e:
                    print(results)
                    print(f"DataFrame shape: {df_results.shape}")
                    print(f"Error processing {task_col_combo}: {e}")

            # Map the process function to each combination
            futures = [executor.submit(wrapper, combo) for combo in column_choice_combinations]

            for future in concurrent.futures.as_completed(futures):
                pbar.update()

### Call the Exhaustive Search Procedure

In [30]:
def call_parallel_q2(dataset, dv):
    
    # start with the number of mcgrath columns
    num_mcgrath_columns = len(mcgrath_colnames)

    for num_cols in range(num_mcgrath_columns, len(set(dataset["task_name"]))+1):
        print("Running exhaustive search for " + str(num_cols) + " columns...")
        
        column_choice_combinations = list(itertools.combinations(task_cols_to_use, num_cols))
        results = []
        lock = threading.Lock()

        # call the parallel q2
        parallel_q2(dataset = dataset , dv = dv, filename = "q2_OLS_from_diff_task_cols.csv", column_choice_combinations = column_choice_combinations, results = results, lock = lock)

In [None]:
call_parallel_q2(team_multi_task_wave1, "score")

Running exhaustive search for 6 columns...


 10%|█         | 10281/100947 [31:46<5:51:58,  4.29it/s]