# Result on decision agreement and decision change patterns

In [26]:
from base_code import *
from scipy.stats import friedmanchisquare, wilcoxon
from statannot import add_stat_annotation

valid_data = pd.read_csv('../neurosurgeon35_data/neurosurgeon35.csv', index_col='Respondent ID')
xai25 = pd.read_csv('../computational_data/xai25.csv')
mriwise = pd.read_csv('../computational_data/mri_wise_acc.csv')
dr_wise_acc = pd.read_csv('../computational_data/dr_wise_acc.csv')
result_long = pd.read_csv('../computational_data/result_long.csv')

# 1. Data prep

In [25]:
# result_error table col: data difficulty, DR ID, DR, DR+AI, DR+XAI, GT, AI Pred.,  will_check_xai, xai_qual, DR attending or resident

label = 'drAlone'
position = valid_data.filter(regex='Are you a')
result_error = result_long[result_long['drXAI'].notnull()] # Do not forget to exclude NaN values in doctors' response!
result_error['Agreement_bl'] = result_error.apply(lambda row: "Agree" if row['AIpred'] == row['drAlone'] else "Disagree", axis = 1)
result_error['Agreement_ai'] = result_error.apply(lambda row: "Agree" if row['AIpred'] == row['drAI'] else "Disagree", axis = 1)
result_error['Agreement_xai'] = result_error.apply(lambda row: "Agree" if row['AIpred'] == row['drXAI'] else "Disagree", axis = 1)
result_error['change_DRAI'] = (result_error['drAlone'] !=result_error['drAI'] )
result_error['change_DRXAI'] = (result_error['drXAI'] !=result_error['drAI'] )
result_error['DrWR'] = result_error.apply(lambda row: row[label] if pd.isnull(row[label]) else "R" if row['gt'] == row[label] else "W", axis = 1)
result_error['AIWR'] = result_error.apply(lambda row: row[label] if pd.isnull(row['AIpred']) else "R" if row['gt'] == row['AIpred'] else "W", axis = 1)
result_error['DR ID'] = result_error['Respondent ID']
result_error = pd.merge(result_error.set_index('Respondent ID'),position, how="outer", left_index=True, right_index=True).reset_index()
# dr baseline accuracy 
DR_ACC = dr_wise_acc[['Respondent ID','DR']].set_index('Respondent ID')
result_error = pd.merge(result_error.set_index('DR ID'), DR_ACC, how="outer", left_index=True, right_index=True).reset_index()
result_error = result_error.rename(columns={'index':'DR ID', 'DR': 'DR Acc'})
# data difficulty as init dr acc for each MRI
result_error = pd.merge(result_error.set_index('dataID'), mriwise[['dataID','DR_Acc']].set_index('dataID'), how="outer", left_index=True, right_index=True).reset_index()
result_error = result_error.rename(columns={'DR_Acc': 'MRI Difficulty'})
# result_error.to_csv('../computational_data/result_error.csv')

In [3]:
result_error

Unnamed: 0.1,dataID,DR ID,Respondent ID,Unnamed: 0,drAlone,drAI,drXAI,will_check_xai,xai_qual,gt,...,Agreement_bl,Agreement_ai,Agreement_xai,change_DRAI,change_DRXAI,DrWR,AIWR,Are you a,DR Acc,MRI Difficulty
0,BraTS20_Training_053,1,1,17,1.0,1.0,1.0,0.0,6,1,...,Agree,Agree,Agree,False,False,R,R,Attending Physician,0.68,1.000000
1,BraTS20_Training_053,3,3,67,1.0,1.0,1.0,0.0,2,1,...,Agree,Agree,Agree,False,False,R,R,Attending Physician,0.84,1.000000
2,BraTS20_Training_053,4,4,92,1.0,1.0,1.0,1.0,9,1,...,Agree,Agree,Agree,False,False,R,R,Resident Physician,0.80,1.000000
3,BraTS20_Training_053,5,5,117,1.0,1.0,1.0,,8,1,...,Agree,Agree,Agree,False,False,R,R,Resident Physician,0.88,1.000000
4,BraTS20_Training_053,6,6,142,1.0,1.0,1.0,0.0,6,1,...,Agree,Agree,Agree,False,False,R,R,Attending Physician,0.76,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
754,BraTS20_Training_325,28,28,687,0.0,0.0,0.0,1.0,4,0,...,Agree,Agree,Agree,False,False,R,R,Attending Physician,0.88,0.870968
755,BraTS20_Training_325,30,30,737,0.0,0.0,0.0,0.0,1,0,...,Agree,Agree,Agree,False,False,R,R,Attending Physician,0.88,0.870968
756,BraTS20_Training_325,31,31,762,0.0,0.0,0.0,1.0,1,0,...,Agree,Agree,Agree,False,False,R,R,Resident Physician,0.80,0.870968
757,BraTS20_Training_325,33,33,812,0.0,0.0,0.0,0.0,2,0,...,Agree,Agree,Agree,False,False,R,R,Attending Physician,0.88,0.870968


# 2. Decision error pattern

In [4]:
# With AI prediction assistance, Dr R/W, AI R/W matrix
label = 'drAI'
result_error['DrAIWR'] = result_error.apply(lambda row: row[label] if pd.isnull(row[label]) else "R" if row['gt'] == row[label] else "W", axis = 1)
grouped=result_error.groupby(['AIWR','DrAIWR']).count()
report = dict(grouped["Respondent ID"])
print({k:v/sum(report.values()) for k, v in report.items()})
grouped

{('R', 'R'): 0.8063241106719368, ('R', 'W'): 0.0764163372859025, ('W', 'R'): 0.05533596837944664, ('W', 'W'): 0.061923583662714096}


Unnamed: 0_level_0,Unnamed: 1_level_0,dataID,DR ID,Respondent ID,Unnamed: 0,drAlone,drAI,drXAI,will_check_xai,xai_qual,gt,AIpred,Agreement_bl,Agreement_ai,Agreement_xai,change_DRAI,change_DRXAI,DrWR,Are you a,DR Acc,MRI Difficulty
AIWR,DrAIWR,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
R,R,612,612,612,612,612,612,612,604,612,612,612,612,612,612,612,612,612,612,612,612
R,W,58,58,58,58,58,58,58,55,58,58,58,58,58,58,58,58,58,58,58,58
W,R,42,42,42,42,42,42,42,41,42,42,42,42,42,42,42,42,42,42,42,42
W,W,47,47,47,47,47,47,47,46,47,47,47,47,47,47,47,47,47,47,47,47


In [5]:
result_error['DrAIWR'].value_counts()

R    654
W    105
Name: DrAIWR, dtype: int64

In [6]:
# With XAI prediction assistance, Dr R/W, AI R/W matrix
label = 'drXAI'
result_error['DrXAIWR'] = result_error.apply(lambda row: row[label] if pd.isnull(row[label]) else "R" if row['gt'] == row[label] else "W", axis = 1)
grouped=result_error.groupby(['AIWR','DrXAIWR']).count()
report = dict(grouped["Respondent ID"])
print({k:v/sum(report.values()) for k, v in report.items()})
grouped

{('R', 'R'): 0.8115942028985508, ('R', 'W'): 0.07114624505928854, ('W', 'R'): 0.05665349143610013, ('W', 'W'): 0.06060606060606061}


Unnamed: 0_level_0,Unnamed: 1_level_0,dataID,DR ID,Respondent ID,Unnamed: 0,drAlone,drAI,drXAI,will_check_xai,xai_qual,gt,...,Agreement_bl,Agreement_ai,Agreement_xai,change_DRAI,change_DRXAI,DrWR,Are you a,DR Acc,MRI Difficulty,DrAIWR
AIWR,DrXAIWR,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
R,R,616,616,616,616,616,616,616,608,616,616,...,616,616,616,616,616,616,616,616,616,616
R,W,54,54,54,54,54,54,54,51,54,54,...,54,54,54,54,54,54,54,54,54,54
W,R,43,43,43,43,43,43,43,41,43,43,...,43,43,43,43,43,43,43,43,43,43
W,W,46,46,46,46,46,46,46,46,46,46,...,46,46,46,46,46,46,46,46,46,46


In [7]:
result_error.groupby(['AIWR','DrAIWR','DrXAIWR']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,dataID,DR ID,Respondent ID,Unnamed: 0,drAlone,drAI,drXAI,will_check_xai,xai_qual,gt,AIpred,Agreement_bl,Agreement_ai,Agreement_xai,change_DRAI,change_DRXAI,DrWR,Are you a,DR Acc,MRI Difficulty
AIWR,DrAIWR,DrXAIWR,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
R,R,R,610,610,610,610,610,610,610,602,610,610,610,610,610,610,610,610,610,610,610,610
R,R,W,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
R,W,R,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
R,W,W,52,52,52,52,52,52,52,49,52,52,52,52,52,52,52,52,52,52,52,52
W,R,R,40,40,40,40,40,40,40,39,40,40,40,40,40,40,40,40,40,40,40,40
W,R,W,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
W,W,R,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3
W,W,W,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44


# 3. Visualize the decision agreement and decision change

In [14]:
import holoviews as hv
from holoviews import opts, dim

def create_shankey(result_error, unit = 'number'):
    sankey = pd.DataFrame(columns = ["source", "dest", "number"])
    result_symbol = result_error.replace({"R":"✓", "W":"✕"})
    for idx, row in result_symbol.iterrows():
        source = 'DR {}, AI {}'.format(row['DrWR'], row['AIWR'])
        target1 = 'DR+AI {}, AI {}'.format(row['DrAIWR'], row['AIWR'])
        target2 = 'DR+XAI {}, AI {}'.format(row['DrXAIWR'], row['AIWR'])
        dicts = [{"source": source, "dest": target1, "number": 1}, {"source": target1, "dest": target2, "number": 1}]
        sankey = sankey.append(dicts, ignore_index=True, sort=False)
    sankey = pd.DataFrame(sankey.groupby(["source", "dest"]).count()).reset_index() 
    sankey['Percentage'] = sankey['number']/(sankey['number'].sum()/2)*100
    sankey['Percentage'] = sankey['Percentage'].round(1)

    hv.extension('bokeh')
    if unit == 'number':
        value_dim = hv.Dimension('number')
    else:
        value_dim = hv.Dimension('Percentage', unit='%')

    sankey1 = hv.Sankey(sankey, kdims=["source", "dest"], vdims=value_dim)
    return sankey1, sankey

In [24]:
sankey1, sankey = create_shankey(result_error, unit = 'Precentage')
sankey1.opts(width=650, height=400, edge_line_width=1, edge_color = 'dest',
             node_alpha=1.0, edge_alpha =0.8, node_width=40, node_sort=True,
            cmap='Blues',label_position='left')

In [23]:
sankey1, sankey = create_shankey(result_error, unit = 'number')
sankey1.opts(width=650, height=400, edge_line_width=1, edge_color = 'dest',
             node_alpha=1.0, edge_alpha =0.8, node_width=40, node_sort=True,
            cmap='Blues',label_position='left')

In [16]:
sankey

Unnamed: 0,source,dest,number,Percentage
0,"DR ✓, AI ✓","DR+AI ✓, AI ✓",576,75.9
1,"DR ✓, AI ✕","DR+AI ✓, AI ✕",42,5.5
2,"DR ✓, AI ✕","DR+AI ✕, AI ✕",8,1.1
3,"DR ✕, AI ✓","DR+AI ✓, AI ✓",36,4.7
4,"DR ✕, AI ✓","DR+AI ✕, AI ✓",58,7.6
5,"DR ✕, AI ✕","DR+AI ✕, AI ✕",39,5.1
6,"DR+AI ✓, AI ✓","DR+XAI ✓, AI ✓",610,80.4
7,"DR+AI ✓, AI ✓","DR+XAI ✕, AI ✓",2,0.3
8,"DR+AI ✓, AI ✕","DR+XAI ✓, AI ✕",40,5.3
9,"DR+AI ✓, AI ✕","DR+XAI ✕, AI ✕",2,0.3


# 4. Fine-grained decision agreement and decision change for attendings and resident+fellow

In [22]:
# attendings
attending = result_error[result_error['Are you a']=='Attending Physician']
resident = result_error[result_error['Are you a']!='Attending Physician']
sankey2, sankey_df2 = create_shankey(attending, unit = 'Precentage')
sankey2.opts(width=610, height=400, edge_line_width=1, edge_color = 'dest',
             node_alpha=1.0, edge_alpha =0.8, node_width=40, node_sort=True,
            cmap='Blues',label_position='left')

In [None]:
sankey2, sankey_df2 = create_shankey(attending)
sankey2.opts(width=650, height=400, edge_line_width=1, edge_color = 'dest',
             node_alpha=1.0, edge_alpha =0.8, node_width=40, node_sort=True,
            cmap='Blues',label_position='left')

In [None]:
# resident+fellow
sankey2, sankey_df2 = create_shankey(resident, unit = 'Precentage')
sankey2.opts(width=610, height=400, edge_line_width=1, edge_color = 'dest',
             node_alpha=1.0, edge_alpha =0.8, node_width=40, node_sort=True,
            cmap='Blues',label_position='left')

In [None]:
sankey2, sankey_df2 = create_shankey(resident)
sankey2.opts(width=650, height=400, edge_line_width=1, edge_color = 'dest',
             node_alpha=1.0, edge_alpha =0.8, node_width=40, node_sort=True,
            cmap='Blues',label_position='left')

# 5. Decision agreement precentage

In [18]:
# Decision agreement in 3 conditions
def report_agree(result_error, agree_label ='Agreement_bl'):
    vc = result_error[agree_label].value_counts()
    print(agree_label, vc)
    print('{}: {:.1f}\% ({})'.format(agree_label, 100*vc['Agree']/(vc['Agree']+vc['Disagree']), vc['Agree']))

In [19]:
for agree_label in ['Agreement_bl', 'Agreement_ai', 'Agreement_xai']:
    report_agree(result_error, agree_label)

Agreement_bl Agree       615
Disagree    144
Name: Agreement_bl, dtype: int64
Agreement_bl: 81.0\% (615)
Agreement_ai Agree       659
Disagree    100
Name: Agreement_ai, dtype: int64
Agreement_ai: 86.8\% (659)
Agreement_xai Agree       662
Disagree     97
Name: Agreement_xai, dtype: int64
Agreement_xai: 87.2\% (662)


In [20]:
for agree_label in ['Agreement_bl', 'Agreement_ai', 'Agreement_xai']:
    report_agree(attending, agree_label)

Agreement_bl Agree       210
Disagree     44
Name: Agreement_bl, dtype: int64
Agreement_bl: 82.7\% (210)
Agreement_ai Agree       217
Disagree     37
Name: Agreement_ai, dtype: int64
Agreement_ai: 85.4\% (217)
Agreement_xai Agree       217
Disagree     37
Name: Agreement_xai, dtype: int64
Agreement_xai: 85.4\% (217)


In [21]:
for agree_label in ['Agreement_bl', 'Agreement_ai', 'Agreement_xai']:
    report_agree(resident, agree_label)

Agreement_bl Agree       405
Disagree    100
Name: Agreement_bl, dtype: int64
Agreement_bl: 80.2\% (405)
Agreement_ai Agree       442
Disagree     63
Name: Agreement_ai, dtype: int64
Agreement_ai: 87.5\% (442)
Agreement_xai Agree       445
Disagree     60
Name: Agreement_xai, dtype: int64
Agreement_xai: 88.1\% (445)
