## Results Analysis

In [76]:
import pandas as pd
from collections import defaultdict, Counter
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import csv
import pprint
import scipy.stats
from scipy.stats import chi2_contingency

In [222]:
results_df = pd.read_csv('./user_study_it3.csv', header=[0])

_Completion Time_

In [223]:
duration = results_df['Duration (in seconds)'].drop([0, 1, 2]).astype(np.int64) 
duration.describe()

count     470.000000
mean      551.644681
std       444.100427
min        77.000000
25%       289.000000
50%       415.500000
75%       637.000000
max      3279.000000
Name: Duration (in seconds), dtype: float64

In [224]:
def reject_outliers(data, m=2):
    return data[abs(data - np.mean(data)) < m * np.std(data)]

time_arr = np.asarray(duration)
santized = reject_outliers(time_arr)
print(np.mean(santized))

475.4575892857143


In [225]:
480.33564814814815 / 60

8.00559413580247

_Background Survey_

In [226]:
bckgnd_tags = ['Q74', 'Q76', 'Q78', 'Q82', 'Q25']
bckgnd_qs = ['age', 'gender', 'education','residence', 'mturk_id']

In [227]:
ct = 0
bckgnd_rows = []
for idx, row in results_df.iterrows():
    if ct < 3: 
            ct += 1
            continue 
    bckgnd_dict = {'idx':str(idx)}
    for qid in bckgnd_tags:
        qidx = bckgnd_tags.index(qid)
        bckgnd_dict[bckgnd_qs[qidx]] = row[qid]
    bckgnd_rows.append(bckgnd_dict)

In [228]:
bckgnd_df = pd.DataFrame.from_dict(bckgnd_rows)
bckgnd_df.head()

Unnamed: 0,age,education,gender,idx,mturk_id,residence
0,52,Some college - no degree,male,3,A3N7R7P9HP2YB6,"North America (United States, Canada)"
1,25,Bachelors/4 year degree,female,4,A299J4PKHAEU9H,Asia & Middle East
2,36,Bachelors/4 year degree,male,5,A2CCK1JKX48R0H,"North America (United States, Canada)"
3,36,Associates/2 year degree,female,6,AT116456PANMW,"North America (United States, Canada)"
4,50,"Graduate degree - Masters, PhD, professional, ...",female,7,A2UHF7UL7G0Y78,"North America (United States, Canada)"


Age

In [229]:
list_ = []
for el in np.array(bckgnd_df['age'].dropna().astype(int)):
    if el > 1000: 
        list_.append(2019-el)
    else: 
         list_.append(el)
print(np.mean(list_))

38.76545842217484


Residence

In [230]:
print(bckgnd_df['residence'].value_counts())

North America (United States, Canada)    380
Asia & Middle East                        85
Africa                                     2
South America                              1
Australia                                  1
Europe                                     1
Name: residence, dtype: int64


In [232]:
# North America %
380 / (380 + 85 + 2 + 1 + 1)

0.8102345415778252

Gender

In [233]:
print(bckgnd_df['gender'].value_counts())

male                    244
female                  224
I prefer not to tell      2
Name: gender, dtype: int64


In [234]:
# Female 
224 / (244 + 224 + 2)

0.4765957446808511

In [235]:
(244 + 224 + 2)

470

Education

In [236]:
print(bckgnd_df['education'].value_counts())

Bachelors/4 year degree                                         217
Some college - no degree                                         74
Associates/2 year degree                                         66
Graduate degree - Masters, PhD, professional, medicine, etc.     63
High school graduate                                             46
Some high school                                                  3
Name: education, dtype: int64


In [237]:
# Bachelors 
217 / (217 + 74 + 66 + 63 + 46 + 3)

0.4626865671641791

In [238]:
# No Degree
(74 + 46 + 3) / (217 + 74 + 66 + 63 + 46 + 3)

0.2622601279317697

### Main Event - The results

Create map structure

In [239]:
map_df = pd.read_csv('./image_map.csv', header=0)
map_df['usid'] = map_df['usid'].astype(str)
map_df['pid'] = map_df['pid'].astype(str)
map_df.head()

Unnamed: 0,usid,sType,pType,pid
0,0,EU,pre,10145
1,1,EU,pre,10176
2,2,EU,pre,10301
3,3,EU,pre,1035
4,4,EU,pre,10453


In [240]:
map_img_info = {}
for index, row in map_df.iterrows():
    map_img_info[row.get('usid')] = {
        'set': row.get('sType'),
        'period': row.get('pType'),
        'pid': row.get('pid'),
    }
    continue

Create result-holding structure

In [241]:
results = {}
for img_id in range(800):
    results[str(img_id)] = {
        's1':[], 
        's2':[],
        's3':[]
    }
print(len(results.keys()))

800


Map Response Strings to Numbers

In [242]:
resp_dict = {
    'Strongly disagree':1,
    'Somewhat disagree': 2,
    'Neither agree nor disagree':3,
    'Somewhat agree':4,
    'Strongly agree': 5,
}

Iterate results and populate structure

In [243]:
anchor_high_rows = []
anchor_high_insts = 0
for idx, row in results_df.iterrows(): 
    if idx in [0,1,2]:                # Subheaders and test row
        continue
    if row.get('Progress') != '100':  # Incomplete
        continue
    # Anchor Filter
    are_anchors_valid = True
    buff = []
    for qid in range(1,23):
        if qid in [6,14]:             # Anchors
            for subqid in range(1,4):
                score = row.get('Q' + str(qid) + '_' + str(subqid))
                if resp_dict.get(score) > 3: 
                    are_anchors_valid = False
                    if idx not in anchor_high_rows: 
                        anchor_high_rows.append(idx)
                    anchor_high_insts += 1
            continue
        img_url = row.get('img_url_q' + str(qid))
        img_id = img_url.split('/')[-1].replace('.png', '')
        for subqid in range(1,4):
            score = row.get('Q' + str(qid) + '_' + str(subqid))
            buff.append(
                {'img_id':img_id, 'subqid': 's' + str(subqid), 'score':resp_dict.get(score)}
            ) # buffer til anchor filter
            # if no anchor filter: results[img_id]['s' + str(subqid)].append(resp_dict.get(score))
            continue
        continue
    # Anchor filter
    if are_anchors_valid:
        for inst in buff:
            curr_arr = results.get(inst.get('img_id')).get(inst.get('subqid'))
            curr_arr.append(inst.get('score'))
            continue
    continue

In [244]:
anchor_high_insts

725

In [245]:
len(anchor_high_rows)

187

Remove before and after instances of images where one of the two instances has < 5 evaluations 

In [246]:
bad_pids = {'EU':[], 'Global':[]}
for img_id in results:
    eval_ct = len(results.get(img_id).get('s1'))
    info_dict = map_img_info.get(img_id)
    pid = info_dict.get('pid')
    dataset = info_dict.get('set')
    if eval_ct < 5: 
        if pid not in bad_pids.get(dataset): 
            bad_pids[dataset].append(pid)
    continue

In [247]:
print(len(bad_pids.get('EU')), len(bad_pids.get('Global')))

61 64


In [248]:
for img_id in map_img_info:
    info_dict = map_img_info.get(img_id)
    pid = info_dict.get('pid')
    dataset = info_dict.get('set')
    if pid in bad_pids.get(dataset):
        del results[img_id]
    continue

In [249]:
expected_rms = 2*(len(bad_pids.get('EU')) + len(bad_pids.get('Global')))
print('Expected Length:', 800 - expected_rms)
print('Actual Length:', len(results))

Expected Length: 550
Actual Length: 550


_Aggregate Analysis 1 - Average Rating And Support_

In [250]:
agg_items = defaultdict(list)
for subqid in range(1,4):
    ids = []
    for img_id in results:
        period = map_img_info.get(img_id).get('period')
        dataset = map_img_info.get(img_id).get('set')
        ids.append(img_id)
        results_arr = np.array(results[img_id]['s'+str(subqid)], dtype=np.int64)
        if len(results_arr) == 0: 
            print('NO RESULTS:', img_id)
            continue
        av_rating = np.mean(results_arr)
        support = len(results_arr)
        agg_items['rating'].append(av_rating)
        agg_items['period'].append(period)
        agg_items['sub_qid'].append(subqid)
        agg_items['support'].append(support)
        agg_items['set'].append(dataset)
    print('Total Images:', len(ids))
print('Final Agg Results 1')
agg_df_1 = pd.DataFrame.from_dict(agg_items)
agg_df_1 = agg_df_1.groupby(['set','sub_qid', 'period'], as_index=False)['rating', 'support'].mean()
print(agg_df_1)

Total Images: 550
Total Images: 550
Total Images: 550
Final Agg Results 1
       set  sub_qid period    rating   support
0       EU        1   post  3.273377  7.798561
1       EU        1    pre  3.184248  7.841727
2       EU        2   post  3.548938  7.798561
3       EU        2    pre  3.472585  7.841727
4       EU        3   post  3.256495  7.798561
5       EU        3    pre  3.185670  7.841727
6   Global        1   post  3.156443  7.720588
7   Global        1    pre  3.107514  7.911765
8   Global        2   post  3.450854  7.720588
9   Global        2    pre  3.409502  7.911765
10  Global        3   post  3.072185  7.720588
11  Global        3    pre  3.082015  7.911765


In [251]:
(7.798561 + 7.841727 + 7.911765 + 7.720588) / 4

7.81816025

_Aggregate Analysis 2 - Score Distributions & Statistical Tests of Distributions_

Reprint response dict

In [252]:
resp_dict = {
    'Strongly disagree':1,
    'Somewhat disagree': 2,
    'Neither agree nor disagree':3,
    'Somewhat agree':4,
    'Strongly agree': 5,
}

Results structure

In [253]:
agg_2_dict = {
    'EU':{
        's1': {
            'pre':[0,0,0,0,0],
            'post':[0,0,0,0,0],
        },
        's2': {
            'pre':[0,0,0,0,0],
            'post':[0,0,0,0,0],
        },
        's3': {
            'pre':[0,0,0,0,0],
            'post':[0,0,0,0,0],
        },
    },
    'Global':{
        's1': {
            'pre':[0,0,0,0,0],
            'post':[0,0,0,0,0],
        },
        's2': {
            'pre':[0,0,0,0,0],
            'post':[0,0,0,0,0],
        },
        's3': {
            'pre':[0,0,0,0,0],
            'post':[0,0,0,0,0],
        },
    }
}

In [254]:
for img_id in results:
    dataset = map_img_info.get(img_id).get('set')
    period  = map_img_info.get(img_id).get('period')
    for subqid in results.get(img_id):
        for score in results.get(img_id).get(subqid):
            agg_2_dict.get(dataset).get(subqid).get(period)[score-1] += 1
            continue
        continue
    continue

In [281]:
for dataset in agg_2_dict: 
    for subqid in ['s1', 's2', 's3']:
        for period in ['pre', 'post']:
            data_arr = agg_2_dict.get(dataset).get(subqid).get(period)
            arr_sum = sum(data_arr)
            ct = 1
            checksum = 0
#             for el in data_arr: 
#                 print(dataset, period, subqid, ct, el/arr_sum)
#                 ct += 1
#                 checksum += el/arr_sum
#                 continue
            neg = (data_arr[0] + data_arr[1]) / arr_sum
            neut = data_arr[2] / arr_sum
            pos = (data_arr[3] + data_arr[4]) / arr_sum
            print(dataset, subqid, period)
            print('D:', neg)
            print('N:', neut)
            print('A:', pos)
            print('CHECKSUM:', neg + neut + pos)
            continue
        continue
    continue

EU s1 pre
D: 0.3247706422018349
N: 0.1981651376146789
A: 0.47706422018348627
CHECKSUM: 1.0
EU s1 post
D: 0.2822878228782288
N: 0.2066420664206642
A: 0.511070110701107
CHECKSUM: 1.0
EU s2 pre
D: 0.24770642201834864
N: 0.16055045871559634
A: 0.591743119266055
CHECKSUM: 1.0
EU s2 post
D: 0.20940959409594095
N: 0.16605166051660517
A: 0.6245387453874539
CHECKSUM: 1.0
EU s3 pre
D: 0.29541284403669726
N: 0.26422018348623855
A: 0.44036697247706424
CHECKSUM: 1.0
EU s3 post
D: 0.25461254612546125
N: 0.28044280442804426
A: 0.46494464944649444
CHECKSUM: 1.0
Global s1 pre
D: 0.3503717472118959
N: 0.20353159851301114
A: 0.44609665427509293
CHECKSUM: 1.0
Global s1 post
D: 0.3361904761904762
N: 0.2
A: 0.4638095238095238
CHECKSUM: 1.0
Global s2 pre
D: 0.2611524163568773
N: 0.18401486988847585
A: 0.5548327137546468
CHECKSUM: 1.0
Global s2 post
D: 0.25523809523809526
N: 0.1619047619047619
A: 0.5828571428571429
CHECKSUM: 1.0
Global s3 pre
D: 0.3252788104089219
N: 0.27230483271375466
A: 0.4024163568773234


Test Distributions

In [256]:
agg_3_dict = {
    'EU':{},
    'Global':{}
}
for dataset in agg_2_dict:
    for subqid in ['s1', 's2', 's3']:
        agg_3_dict.get(dataset)[subqid] = np.array([agg_2_dict.get(dataset).get(subqid).get('post'), agg_2_dict.get(dataset).get(subqid).get('pre')])
pprint.pprint(agg_3_dict)

{'EU': {'s1': array([[ 94, 212, 224, 402, 152],
       [107, 247, 216, 357, 163]]),
        's2': array([[ 72, 155, 180, 453, 224],
       [ 80, 190, 175, 414, 231]]),
        's3': array([[ 92, 184, 304, 360, 144],
       [109, 213, 288, 319, 161]])},
 'Global': {'s1': array([[ 98, 255, 210, 351, 136],
       [112, 265, 219, 362, 118]]),
            's2': array([[ 79, 189, 170, 406, 206],
       [ 87, 194, 198, 404, 193]]),
            's3': array([[108, 229, 307, 287, 119],
       [114, 236, 293, 318, 115]])}}


In [257]:
chi_tests = {
    'EU':{}, 
    'Global':{}
}
for dataset in agg_3_dict:
    for subqid in agg_3_dict.get(dataset):
        chi_tests[dataset][subqid] = chi2_contingency(observed=agg_3_dict.get(dataset).get(subqid))

In [258]:
pprint.pprint(chi_tests)

{'EU': {'s1': (6.690698680472139,
               0.15316443682412756,
               4,
               array([[100.22263109, 228.86660534, 219.39282429, 378.4526219 ,
        157.06531739],
       [100.77736891, 230.13339466, 220.60717571, 380.5473781 ,
        157.93468261]])),
        's2': (5.8877028806304175,
               0.20769312037398546,
               4,
               array([[ 75.79024839, 172.02391904, 177.0101196 , 432.30358786,
        226.87212511],
       [ 76.20975161, 172.97608096, 177.9898804 , 434.69641214,
        228.12787489]])),
        's3': (7.395368821634824,
               0.11641257672488715,
               4,
               array([[100.22263109, 197.95216191, 295.18307268, 338.56301748,
        152.07911684],
       [100.77736891, 199.04783809, 296.81692732, 340.43698252,
        152.92088316]]))},
 'Global': {'s1': (2.4421454716728754,
                   0.6550246291343599,
                   4,
                   array([[103.7158984 , 256.82031985, 211

Test EU v. Global Distributions for same conditions

In [259]:
agg_4_dict = {
    's1':{}, 
    's2':{},
    's3':{}
}
for subqid in ['s1', 's2', 's3']:
    for period in ['pre', 'post']:
        agg_4_dict[subqid][period] = np.array([agg_2_dict.get('EU').get(subqid).get(period), agg_2_dict.get('Global').get(subqid).get(period)])
pprint.pprint(agg_4_dict)

{'s1': {'post': array([[ 94, 212, 224, 402, 152],
       [ 98, 255, 210, 351, 136]]),
        'pre': array([[107, 247, 216, 357, 163],
       [112, 265, 219, 362, 118]])},
 's2': {'post': array([[ 72, 155, 180, 453, 224],
       [ 79, 189, 170, 406, 206]]),
        'pre': array([[ 80, 190, 175, 414, 231],
       [ 87, 194, 198, 404, 193]])},
 's3': {'post': array([[ 92, 184, 304, 360, 144],
       [108, 229, 307, 287, 119]]),
        'pre': array([[109, 213, 288, 319, 161],
       [114, 236, 293, 318, 115]])}}


In [260]:
chi_tests = {
    's1':{},
    's2':{},
    's3':{},
}
for subqid in agg_4_dict:
    for period in agg_4_dict.get(subqid):
        chi_tests[subqid][period] = chi2_contingency(observed=agg_4_dict.get(subqid).get(period))

In [261]:
pprint.pprint(chi_tests)

{'s1': {'post': (8.297733790705031,
                 0.08126090712813874,
                 4,
                 array([[ 97.52952202, 237.22024367, 220.45735708, 382.49859419,
        146.29428304],
       [ 94.47047798, 229.77975633, 213.54264292, 370.50140581,
        141.70571696]])),
        'pre': (7.9186750534673465,
                0.09460306933246748,
                4,
                array([[110.20775623, 257.65466297, 218.90581717, 361.82363804,
        141.40812558],
       [108.79224377, 254.34533703, 216.09418283, 357.17636196,
        139.59187442]]))},
 's2': {'post': (6.7557751674350435,
                 0.14937113031254753,
                 4,
                 array([[ 76.70290534, 174.74039363, 177.78819119, 436.34301781,
        218.42549203],
       [ 74.29709466, 169.25960637, 172.21180881, 422.65698219,
        211.57450797]])),
        'pre': (5.190947651262577,
                0.26826015496127764,
                4,
                array([[ 84.03970452, 193.2409

Test Aggregated Decisions

In [269]:
agg_5_dict = {
    'EU':{},
    'Global':{}
}
for dataset in agg_2_dict:
    for subqid in ['s1', 's2', 's3']:
        pre_dec_arr = agg_2_dict.get(dataset).get(subqid).get('pre')
        pre_agg_dec = [
            pre_dec_arr[0] + pre_dec_arr[1],
            pre_dec_arr[3] + pre_dec_arr[4]
        ]
        
        post_dec_arr = agg_2_dict.get(dataset).get(subqid).get('post')
        post_agg_dec = [
            post_dec_arr[0] + post_dec_arr[1],
            post_dec_arr[3] + post_dec_arr[4]
        ]
        agg_5_dict.get(dataset)[subqid] = np.array([post_agg_dec, pre_agg_dec])
        continue
    continue
pprint.pprint(agg_5_dict)

{'EU': {'s1': array([[306, 554],
       [354, 520]]),
        's2': array([[227, 677],
       [270, 645]]),
        's3': array([[276, 504],
       [322, 480]])},
 'Global': {'s1': array([[353, 487],
       [377, 480]]),
            's2': array([[268, 612],
       [281, 597]]),
            's3': array([[337, 406],
       [350, 433]])}}


In [270]:
chi_tests = {
    'EU':{}, 
    'Global':{}
}
for dataset in agg_5_dict:
    for subqid in agg_5_dict.get(dataset):
        chi_tests[dataset][subqid] = chi2_contingency(observed=agg_5_dict.get(dataset).get(subqid))

In [271]:
pprint.pprint(chi_tests)

{'EU': {'s1': (4.248179652847265,
               0.03929242621470384,
               1,
               array([[327.33564014, 532.66435986],
       [332.66435986, 541.33564014]])),
        's2': (4.209858557049462,
               0.04018968812221459,
               1,
               array([[246.99725124, 657.00274876],
       [250.00274876, 664.99725124]])),
        's3': (3.618647213259608,
               0.05713531398918344,
               1,
               array([[294.84197219, 485.15802781],
       [303.15802781, 498.84197219]]))},
 'Global': {'s1': (0.5916449196428794,
                   0.44178374251290076,
                   1,
                   array([[361.34354744, 478.65645256],
       [368.65645256, 488.34354744]])),
            's2': (0.4221377679086642,
                   0.5158722625930325,
                   1,
                   array([[274.81228669, 605.18771331],
       [274.18771331, 603.81228669]])),
            's3': (0.0425559433474613,
                   0.836563

In [276]:
agg_6_dict = {
    's1':{}, 
    's2':{},
    's3':{}
}
for subqid in ['s1', 's2', 's3']:
    eu_agg_dec_post = agg_5_dict.get('EU').get(subqid)[0]
    eu_agg_dec_pre = agg_5_dict.get('EU').get(subqid)[1]
    g_agg_dec_post = agg_5_dict.get('Global').get(subqid)[0]
    g_agg_dec_pre = agg_5_dict.get('Global').get(subqid)[1]
    agg_6_dict[subqid]['pre'] = np.array([eu_agg_dec_pre, g_agg_dec_pre])
    agg_6_dict[subqid]['post'] = np.array([eu_agg_dec_post, g_agg_dec_post])
    continue
pprint.pprint(agg_6_dict)

{'s1': {'post': array([[306, 554],
       [353, 487]]),
        'pre': array([[354, 520],
       [377, 480]])},
 's2': {'post': array([[227, 677],
       [268, 612]]),
        'pre': array([[270, 645],
       [281, 597]])},
 's3': {'post': array([[276, 504],
       [337, 406]]),
        'pre': array([[322, 480],
       [350, 433]])}}


In [277]:
chi_tests = {
    's1':{},
    's2':{},
    's3':{},
}
for subqid in agg_6_dict:
    for period in agg_6_dict.get(subqid):
        chi_tests[subqid][period] = chi2_contingency(observed=agg_6_dict.get(subqid).get(period))

In [278]:
pprint.pprint(chi_tests)

{'s1': {'post': (7.161060717887675,
                 0.007450312626715736,
                 1,
                 array([[333.37647059, 526.62352941],
       [325.62352941, 514.37647059]])),
        'pre': (2.016345663303952,
                0.1556132512183144,
                1,
                array([[369.08954362, 504.91045638],
       [361.91045638, 495.08954362]]))},
 's2': {'post': (6.088212225921172,
                 0.013608671688541395,
                 1,
                 array([[250.82959641, 653.17040359],
       [244.17040359, 635.82959641]])),
        'pre': (1.197056065432094,
                0.27391087084904886,
                1,
                array([[281.18516453, 633.81483547],
       [269.81483547, 608.18516453]]))},
 's3': {'post': (15.322477008975383,
                 9.063170337355346e-05,
                 1,
                 array([[313.9461589, 466.0538411],
       [299.0538411, 443.9461589]])),
        'pre': (3.1751516100386383,
                0.074766272817