In [None]:
import sys
sys.path.append('../implementation')
import numpy as np
import pandas as pd
import ast
from tqdm import tqdm
from util import lognormpdf
import numpy as np
import matplotlib.pyplot as plt
import scipy.special as sp
from wall_bias import Wall
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Loading the STL Crimes underlying data and user interaction data
underlying_data = pd.read_csv('../data/stl_crimes/dots.csv')
underlying_data.set_index('id', drop=True, inplace=True)

interaction_data = pd.read_csv('../data/stl_crimes/stl_combined_interactions.csv')
interaction_data['interaction_session'] = interaction_data.apply(lambda row: ast.literal_eval(row.interaction_session), axis=1)
interaction_data['interaction_type_session'] = interaction_data.apply(lambda row: ast.literal_eval(row.interaction_type_session), axis=1)

In [None]:
interaction_index = 0

user = interaction_data.iloc[interaction_index]
print(f'Processing user {user.user} task {user.task}')
wall = Wall(underlying_data, [['x', 'y']], ['type'])

for i in tqdm(range(len(interaction_data.iloc[interaction_index].interaction_session))):
    interaction = interaction_data.iloc[interaction_index].interaction_session[i]
    interaction_d = pd.DataFrame(data=underlying_data.iloc[interaction].to_dict(), index=[i])
    wall.update(interaction_d)

bias = wall.get_attribute_bias()
interact = wall.get_interaction_session()
    

In [None]:
bias

In [None]:
pd.DataFrame(bias).plot(title=f'{interaction_data.iloc[interaction_index].task}', alpha=0.5, lw=4)

In [None]:
# Not necessary to run if we already have results file for Wall
# Running Wall through all user interaction sessions and saving results in file
stl_map_results = pd.DataFrame()

for participant_index, row in interaction_data.iterrows():
    print(f'Processing user {row.user} task {row.task}')
    results = {'participant_id': row.user, 'task': row.task}
    wall = Wall(underlying_data, ['x', 'y'], ['type'])
    for i in tqdm(range(len(interaction_data.iloc[participant_index].interaction_session))):
        interaction = interaction_data.iloc[participant_index].interaction_session[i]
        interaction_d = pd.DataFrame(data=underlying_data.iloc[interaction].to_dict(), index=[i])
        wall.update(interaction_d)

    bias = wall.get_attribute_bias()
    for col in bias.columns:
        results[f'bias-{col}'] = bias[col].to_numpy()
    results[f'bias-mixed'] = results['bias-x___y'] * results['bias-type']
    
    stl_map_results = stl_map_results.append(results, ignore_index=True)
    
stl_map_results.to_pickle('../output/stl_map_results_wall.pkl')

In [None]:
fig, axs = plt.subplots(1, 3, sharey=True, figsize=(4*6.4, 4.8))
plt.rcParams.update({'axes.titlesize': 15, 'axes.labelsize': 15, 'xtick.labelsize':12, 'xtick.labelsize':12})
for ax in axs:
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_color('black')
    ax.spines['bottom'].set_color('black')
    ax.set(xlabel='Interactions Observed', ylabel='Avg. Bias')
    ax.set_ylim((0, 1.05))
#    ax.set_xlim((2, 15))

bias_metric_per_task_wall = {'geo-based': 'bias-x___y', 'type-based': 'bias-type', 'mixed': 'bias-mixed'}
for ai, t in enumerate(['geo-based', 'type-based', 'mixed']):
    bias_over_time_wall = pd.DataFrame()
    for i, row in stl_map_results[stl_map_results.task == t].iterrows():
        temp_df = pd.DataFrame()
        temp_df[row['participant_id']] = row[bias_metric_per_task_wall[t]]
        bias_over_time_wall = pd.concat([bias_over_time_wall, temp_df], axis=1, ignore_index=True)
    sems_wall = bias_over_time_wall.std(axis=1) / np.sqrt(bias_over_time_wall.count(axis=1))
    mean_wall = bias_over_time_wall.mean(axis=1)
    mean_wall.plot(ax=axs[ai], title=f'Aggregate Bias Detection for {t} Task', label='Wall', color='#1F77B4')
    axs[ai].fill_between(list(range(len(mean_wall))), mean_wall-2*sems_wall,mean_wall+2*sems_wall, color='#d95f02', alpha=0.3, zorder=100)
    axs[0].legend()