# Map Training Analysis
*written by Viviane Kakerbeck*

#### Import Dependencies

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import scipy.io as spio
from scipy.spatial import distance
import ezodf
from matplotlib.patches import Arrow, Circle
from PIL import Image
import itertools
import ptitprince as pt
from __future__ import print_function
from statsmodels.compat import lzip
from statsmodels.stats.anova import AnovaRM
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
from scipy import stats

#### Define Paths
These two fields need to be set for each user individually. Set mapPath to your path leading to your map training data and taskPath to lead to your alignment task results.

In [None]:
#mapPath = "/Users/ingen/Dropbox/VR alignment/bachelor_master_Arbeiten/Laura/scripts/viewed_data/"
#taskPath = "/Users/ingen/Dropbox/VR alignment/bachelor_master_Arbeiten/Laura/scripts/over_all_subjects/trials_mat/"
#mapPath = "/Users/ingen/Dropbox/Project Seahaven/Tracking/MapResults/"
#taskPath = "/Users/ingen/Dropbox/Project Seahaven/Tracking/TaskPerformance/"
mapPath = "C:/Users/vivia/Dropbox/Project Seahaven/Tracking/MapResults/"
taskPath = "C:/Users/vivia/Dropbox/Project Seahaven/Tracking/TaskPerformance/"

#### File Dependencies:

For this script you need the following files:
* #.ods from Map training (location specified by mapPath)
* AlignmentVR_SubjNo_#.mat from Task (location specified by taskPath)

In the same folder as this script should be:

* complete_list_houses.txt (for house angles)
* HouseList.txt (for house coordinates)
* Seahaven alingment project.csv -> download from google spreadsheet with list of all recordings & discard marks

## Map Training

#### Function to Check Usability of a Subject (Discarded? Right Condition?)

In [None]:
def checkUsability(SNum,Rep=False):
    overview = pd.read_csv("./Seahaven alingment project.csv")
    if (overview.loc[overview['Subject#']==SNum].empty):
        #print(str(SNum)+" not in list.")
        if Rep == True:
            return False," "
        else:
            return False
    else:
        if (overview.loc[overview['Subject#']==SNum]['Discarded']=='yes').bool():
            #print(str(SNum)+" discarded.")
            if Rep == True:
                return False," "
            else:
                return False
        if Rep==False:
            if (overview.loc[overview['Subject#']==SNum]['Measurement#']>1).bool():
                #print(str(SNum)+" repeated measure.")
                return False
    if Rep==True:
        if (overview.loc[overview['Subject#']==SNum]['Repeated'].isnull()).bool():
            return False," "
        else:
            return True,(overview.loc[overview['Subject#']==SNum]['Repeated']).values[0]
    return True

#### Load All Map Training Data Into a DataFrame - For .osc Files
Only run once! If you already have ClickStatsAll.csv saved just load this file.

In [None]:
allFiles = os.listdir(mapPath)
g = open("./complete_list_houses.txt","r")
allHouses = []
for i in g:
    allHouses.append(str(int(i.split('_',1)[0])))
AllDf = pd.DataFrame(allHouses,columns=['House'])

for e in allFiles:
    start = 0
    lastI = 0
    lastV = 0
    if e.endswith(".ods") and checkUsability(int(e.split('.',1)[0])):
        doc = ezodf.opendoc(mapPath+e)
        sheet = doc.sheets[0]
        for i, row in enumerate(sheet.rows()):
            for cell in row:
                if cell.value=='Mouse Click Stats:':#only get mouse click stats, not hovering
                    start = i
                    Subjectdf = pd.DataFrame(columns=['House',str(int(e.split('.',1)[0]))])

                if start>0 and start<i-1 and cell.value!=None:

                    if lastI==i:
                        #print(str(int(lastV.split('_',1)[0])))#.split('\t',1)[1].split('\n',1)[0])
                        Subjectdf = Subjectdf.append({'House': str(int(lastV.split('_',1)[0])),str(e.split('.',1)[0]):int(cell.value)}, ignore_index=True)
                lastI = i
                lastV = cell.value
        AllDf = AllDf.merge(Subjectdf,on='House',sort=True,how='outer')
AllDf = AllDf.fillna(int(0))          
AllDf = AllDf.set_index('House')
AllDf = AllDf[~AllDf.index.duplicated(keep='first')]

#### Save Table in Excel Format

In [None]:
AllDf.to_csv('Results/ClickStatsAll.csv')

### Take a Look at Map Training Data

#### Load Table

In [None]:
AllDf = pd.read_csv('Results/ClickStatsAll.csv').set_index('House')
len(AllDf.columns)# following analysis is of 64 subjects

#### Excerpt from Data Table

In [None]:
AllDf.head()

#### Overall Statistics

In [None]:
AllDf.describe()

#### Data Distributions

In [None]:
plt.figure(figsize=(15,5))
sns.distplot(AllDf.mean(axis=1),norm_hist=False,kde=False,color='royalblue')# if you don't want pdf, set norm_hist=False,kde=False
plt.plot([np.mean(AllDf.mean(axis=1)), np.mean(AllDf.mean(axis=1))], [0, 41], linewidth=2)
plt.legend(['mean: '+str(np.mean(AllDf.mean(axis=1)))[:4],'distribution'],fontsize=15)
plt.title("Distribution of Mean Number of Clicks on one House over Subjects",fontsize=20)
plt.xlabel('Mean Number of Clicks on one House',fontsize=15)
plt.ylabel('Subject Count',fontsize=15)
plt.show()
#plt.savefig('Results/MeanClickDistNoTitle.png')

In [None]:
plt.figure(figsize=(15,5))
sns.distplot(AllDf.sum(axis=1),color='royalblue')
plt.plot([np.mean(AllDf.sum(axis=1)), np.mean(AllDf.sum(axis=1))], [0, 0.0037], linewidth=2)
plt.legend(['mean: '+str(np.mean(AllDf.sum(axis=1)))[:4],'distribution'],fontsize=15)
plt.title("Distribution of Overall Number of Clicks on one House over Subjects",fontsize=20)
plt.xlabel('Mean Number of Clicks on one House',fontsize=15)
plt.ylabel('Probability Density',fontsize=15)
plt.show()
#plt.savefig('Results/HouseClickDistNoTitle.png')

This means an average amount of 271/64 = 4.23 on each house

In [None]:
plt.figure(figsize=(15,5))
sns.distplot((AllDf > 0).astype(int).sum(axis=0),norm_hist=False,kde=False,color='royalblue')
plt.plot([np.mean((AllDf > 0).astype(int).sum(axis=0)), np.mean((AllDf > 0).astype(int).sum(axis=0))], [0, 17], linewidth=2)
plt.yticks(np.arange(0, 21, step=5))
plt.legend(['mean: '+str(np.mean((AllDf > 0).astype(int).sum(axis=0)))[:4],'distribution'],fontsize=15)
plt.title("Distribution of Number of Houses That Were Looked at",fontsize=20)
plt.xlabel('Number of Houses That Were Looked at by a Subject',fontsize=15)
plt.ylabel('Subject Count',fontsize=15)
plt.show()
#plt.savefig('Results/ClickedDistAbsCountNoTitle.png')

#### Plot Whole Click Distribution -> Any (Ir)regularities?

In [None]:
plt.figure(figsize=(15,35))
sns.heatmap(AllDf)
plt.title('Number of Clicks on Each House by Each Subject',fontsize=20)
plt.ylabel('House Number',fontsize=15)
plt.xlabel('Subject Number',fontsize=15)
#plt.show()
plt.savefig('Results/ClickHeatmap.png')

## Task Performance

#### Load Data of Task Performance (.mat Files) into DataFrame

In [None]:
def mat_to_py(AlignmentPath,number):
    '''
    converts mat struct with task results into (numpy) array

    also adds extra column with information whether trial was correct or wrong
    
    conditions = ["Absolute - 3s ","Absolute - inf","Relative - 3s ","Relative - inf","Pointing 3s   ","Pointing - inf"]
    '''
    path = AlignmentPath+"/AlignmentVR_SubjNo_"+number+".mat"
    mat_contents = spio.loadmat(path)
    type_array = []
    for i,cond_1 in enumerate(["Absolute", "Relative","Pointing"]):
        for j,cond_2 in enumerate(["Trial_3s", "Trial_Inf"]):
            trials_array = []
            for line in range(len(mat_contents['Output'][0][0][cond_1][cond_2][0][0])):
                value_array = []
                for column in range(len(mat_contents['Output'][0][0][cond_1][cond_2][0][0][line][0])):
                    value = mat_contents['Output'][0][0][cond_1][cond_2][0][0][line][0][column][0][0]
                    value_array.append(value)
                # check if trial is correct(true or false
                value_array.append(value_array[-1] == value_array[-3])
                trials_array.append(value_array)

            type_array.append(trials_array)

    return np.array(type_array)

In [None]:
conditions = ["Absolute - 3s ","Absolute - inf","Relative - 3s ","Relative - inf","Pointing 3s   ","Pointing - inf"]
vp_nums = list(AllDf)
AllResults = np.zeros((6,len(vp_nums),36))#AllResults[condition][subjectNum][Trial]
AllHouses = np.zeros((6,len(vp_nums),36))
LeastClickHouse = np.zeros((6,len(vp_nums),36))
for i,e in enumerate(vp_nums):
    try:
        m = mat_to_py(taskPath,e)
        for c in range(6):       
            condperf = []
            house = []
            lchouse = []
            for t in range(36):
                condperf.append(int(m[c][t][-1]))
                #print(m[c][t][0])
                house.append(str(m[c][t][0]))
                if c<2:#absolute condition -> only one house, take this one
                    lchouse.append(str(m[c][t][0]))
                else:#relative or pointing condition -> look if prime or target had more clicks, pick house with least clicks
                    if AllDf.loc[int(m[c][t][0])][e]<AllDf.loc[int(m[c][t][1])][e]:
                        lchouse.append(str(m[c][t][0]))
                    else:
                        lchouse.append(str(m[c][t][1]))
                    
            AllResults[c][i] = condperf
            AllHouses[c][i] = house
            LeastClickHouse[c][i] = lchouse      
    except:
        print(str(e)+" Not in folder")

### Create Performance Matrix and Save as .cvs File

In [None]:
performances = np.zeros((6,len(AllDf.columns)))#pd.DataFrame()
vpN = pd.DataFrame(vp_nums,columns=['vp_number'])
for cond in range(6):
    performances[cond] = np.mean(AllResults[cond],axis=1)
p = pd.DataFrame(np.transpose(performances)) 
p.columns = conditions
p = vpN.join(p).set_index('vp_number')
#p.to_csv('Results/MapPerformances.csv')#comment in to save file

In [None]:
p.describe()

### Put Data into DataFrame

In [None]:
TaskList = ['Absolute','Absolute','Relative','Relative','Pointing','Pointing']
CondList = ['3s','inf','3s','inf','3s','inf']
AllPerformances = pd.DataFrame(columns=['Task','Condition','Performance','Subject'])
for sj in list(p.index):
    for i,c in enumerate(conditions):
        AllPerformances = AllPerformances.append({'Task':TaskList[i],'Condition':CondList[i],'Performance':p.loc[sj][c],'Subject':sj}, ignore_index=True)

### Visualize Overall Statistics

In [None]:
#group tasks
#color by time condition
fig,ax = plt.subplots(figsize=(10,7))
plt.plot([-5,10],[0.5,0.5],':',color='black', linewidth=5)
sns.boxplot(data=AllPerformances,hue='Condition',x='Task',y='Performance', palette=["red", "royalblue"],linewidth=2.5)
ax.set_xticklabels(['Absolute','Relative','Pointing'],fontsize=15)
ax.set_ylim((0,1))
plt.legend(fontsize=20,loc=4)
#plt.title('Performance of Subjects in the Tasks',fontsize=25)
plt.ylabel('Performance (%)',fontsize=20)
plt.yticks(np.linspace(0,1,5),np.linspace(0,100,5,dtype=int),fontsize=15)
plt.xlabel("Task",fontsize=20)
plt.show()
#plt.savefig('Results/TaskPerformancesGrouped.png', bbox_inches='tight')

In [None]:
#Plotting adapted from https://peerj.com/preprints/27137v1/
ax = pt.RainCloud(data=AllPerformances,hue='Condition',x='Task',y='Performance', palette=["red", "royalblue"],bw = 0.2,
                 width_viol = .5, figsize = (10,7),pointplot = False, alpha = .85, dodge = True, move = 0.2)

ax.set_xticklabels(['Absolute','Relative','Pointing'],fontsize=15)
#ax.legend(['3s','inf'],fontsize=20,loc=1)

plt.title('Performance of Subjects in the Tasks',fontsize=25)
plt.ylabel('Performance (%)',fontsize=20)
plt.xlabel("Task",fontsize=20)
plt.yticks(np.linspace(0.25,0.75,3),np.linspace(25,75,3),fontsize=15)
plt.show()
#plt.savefig('Results/TaskPerformancesRainCloud.png', bbox_inches='tight')

In [None]:
ax = pt.RainCloud(data=AllPerformances[AllPerformances['Condition']=='inf'],x='Task',y='Performance', palette=["royalblue"],bw = 0.2,
                 width_viol = .5, figsize = (10,7),pointplot = False, alpha = .85, dodge = True, move = 0.2)
plt.plot([-5,10],[0.5,0.5],':',color='black', linewidth=3)
ax.set_xticklabels(['Absolute','Relative','Pointing'],fontsize=15)
#ax.set_ylim((0,1))
#ax.legend(['3s','inf'],fontsize=20)

#plt.title('Performance of Subjects in the Tasks - Infinite',fontsize=25)
plt.ylabel('Performance (%)',fontsize=20)
plt.yticks(np.linspace(0.25,0.75,3),np.linspace(25,75,3,dtype=int),fontsize=15)
plt.xlabel("Task",fontsize=20)
plt.show()
#plt.savefig('Results/TaskPerformancesRainCloud_Infinite_NoTitle.png', bbox_inches='tight')

## Repeated Measure ANOVA for Tasks and Conditions

In [None]:
anovarm = AnovaRM(AllPerformances,'Performance','Subject',within=['Task','Condition'])
fit = anovarm.fit()
fit.summary()

## Factorial ANOVA (One Way Repeated Measure) on Infinite Conditions

In [None]:
infPerformances = AllPerformances[AllPerformances['Condition']=='inf']
anovarm = AnovaRM(infPerformances,'Performance','Subject',within=['Task'])
fit = anovarm.fit()
fit.summary()

## Post-Hoc Paired T-Test on Infinite Conditions

In [None]:
print("Absolute - Relative: "+str(stats.ttest_rel(infPerformances[infPerformances['Task']=='Absolute']['Performance'],infPerformances[infPerformances['Task']=='Relative']['Performance'])))
print("Absolute - Pointing: "+str(stats.ttest_rel(infPerformances[infPerformances['Task']=='Absolute']['Performance'],infPerformances[infPerformances['Task']=='Pointing']['Performance'])))
print("Relative - Pointing: "+str(stats.ttest_rel(infPerformances[infPerformances['Task']=='Relative']['Performance'],infPerformances[infPerformances['Task']=='Pointing']['Performance'])))

#### Performance Different from chance?

In [None]:
stats.ttest_1samp(newDF['Performance'], 0.5)

#### Difference in Performance Between Inf and 3 Sec Condition?

In [None]:
stats.ttest_ind(newDF['Performance'][newDF['Condition']=='inf'], newDF['Performance'][newDF['Condition']=='3s'])

#### Fit Linear Regression Model

In [None]:
perf_model = ols("Performance ~ Condition + Task", data=newDF).fit()

In [None]:
print(perf_model.summary())

# Performance in Relation to Clicks

### Create DataFrame

In [None]:
AllClickPerf2 = pd.DataFrame(columns = {'numClicks','Performance','Subject'})
conds = [1,3,5]
for c in conds:#range(6):
    for i,s in enumerate(vp_nums):
        for t in range(36):
            house = LeastClickHouse[c][i][t]#AllHouses[c][i][t]
            #print(int(house))
            numviews = AllDf.loc[int(house)][s]
            AllClickPerf2 = AllClickPerf2.append({'numClicks':numviews,'Performance':AllResults [c][i][t],'Subject':float(s)}, ignore_index=True)

### One Point for Each Subject-NumClick Combination Averaged Over Tasks and Trials
Same procedure as explained in Lauras Bachelors Thesis.

In [None]:
grouped2 = AllClickPerf2.groupby(['Subject','numClicks'], as_index=False)['Performance'].mean()

In [None]:
grouped2.to_csv('Results/SubjectClickPerfSorted.csv')

In [None]:
grouped2 = pd.read_csv('Results/SubjectClickPerfSorted.csv')

In [None]:
plot = sns.lmplot(x='numClicks',y='Performance',data = grouped2,height=7,aspect=2,scatter_kws={"s": 30},x_jitter=.03,order=1,x_estimator=np.mean,fit_reg=True)
plt.title('Performance in Relation to Number of Clicks - Infinite - Averaged over Tasks and Trials for Each Number of Clicks',fontsize=20)
plt.xlabel('Number of Clicks',fontsize=15)
plt.ylabel('Average Performance in %',fontsize=15)
plt.subplots_adjust(top=0.9)
plt.show()
#plt.savefig('Results/ClickPerfInf_NumCAvg.png', bbox_inches='tight')

### Plot Using Log(Clicks)

In [None]:
grouped2['numClicks'] = np.log(grouped2['numClicks']+1)

In [None]:
plt.figure(figsize=(15,10))
sns.lmplot(x='numClicks',y='Performance',data = grouped2[grouped2['numClicks']<100], height=7,aspect=2,palette=["royalblue"],x_jitter=.09,lowess=False)
#plt.title('Performance in Relation to Number of Clicks - Infinite - Averaged over Tasks and Trials for Each Number of Clicks',fontsize=20)
plt.xlabel('Log(Number of Clicks)',fontsize=25)
plt.ylabel('Performance (%)',fontsize=25)
#plt.xticks(np.linspace(0,60,7),fontsize=20)
plt.yticks(np.linspace(0,1,5),np.linspace(0,100,5),fontsize=20)
plt.subplots_adjust(top=0.9)
plt.show()
#plt.savefig('Results/ClickPerf_TTAvg_NoTitle_All.png', bbox_inches='tight')

### Linear Regression Model Based on Performance ~ Number of Clicks

In [None]:
clickperf_model = ols("Performance ~ numClicks", data=grouped2).fit()
print(clickperf_model.summary())

#### Pearson Correlation:
(correlation coefficient, p-value)

In [None]:
scipy.stats.pearsonr(grouped2['Performance'], grouped2['numClicks'])

### Weighted Linear Regression
Weighted by number of trials in one data point

In [None]:
groupedWeighted = AllClickPerf2.groupby(['Subject','numClicks'], as_index=False).agg(['mean', 'count'])
groupedWeighted.reset_index(inplace=True)
weighted2 = pd.DataFrame(groupedWeighted.to_records())
weighted2.columns = ['Ix','Subject','numClicks','Performance','Count']
weighted2.head()

In [None]:
from statsmodels.formula.api import wls
WLS = wls("Performance ~ numClicks", data=weighted2,weights=np.array(1./weighted2['Count'])).fit()
WLS.summary()

### Plot it:

In [None]:
grouped2 = pd.read_csv('Results/SubjectClickPerfSorted.csv')
grouped2['numClicks'] = np.log(grouped2['numClicks']+1)
sns.lmplot(x='numClicks',y='Performance',data = grouped2[grouped2['numClicks']<100], height=7,aspect=1.4,palette=["royalblue"],x_jitter=.03,lowess=False)
#plt.title('Performance in Relation to Number of Clicks - Infinite - Averaged over Tasks and Trials for Each Number of Clicks',fontsize=20)
plt.plot([0, 4], [0.4958, 0.4958+0.018], linewidth=3,color='orange',linestyle='-')
plt.xlabel('Log(Number of Clicks)',fontsize=25)
plt.ylabel('Performance (%)',fontsize=25)
plt.xticks(np.linspace(0,4,5),fontsize=20)
plt.yticks(np.linspace(0,1,5),np.linspace(0,100,5),fontsize=20)
plt.legend(['Linear Regression','Weighted Linear Regression'],fontsize=15)
plt.xlim([0,4])
plt.subplots_adjust(top=0.9)
plt.show()
#plt.savefig('Results/ClickPerf_TTAvg_NoTitle_All.png', bbox_inches='tight')

## Spatial Coverage of Seahaven

In [None]:
SeahavenMap = Image.open('map5.png')
coordinates = open("HouseList.txt","r")
coords = pd.DataFrame(columns={'House','x','y'})
for co in coordinates:
    x = float(co.split(':',1)[1].split(';',1)[0])
    y = float(co.split(';',1)[1])
    house = str(co.split(':',1)[0])
    coords = coords.append({'House':house,'x':x,'y':y},ignore_index=True)
coords = coords.set_index('House').sort_index()
overallClicks = np.sum(AllDf,axis=1)

In [None]:
SeahavenMap = Image.open('map5.png')
coordinates = open("HouseList.txt","r")
coords = pd.DataFrame(columns={'House','x','y'})
for co in coordinates:
    x = float(co.split(':',1)[1].split(';',1)[0])
    y = float(co.split(';',1)[1])
    house = str(co.split(':',1)[0])
    coords = coords.append({'House':house,'x':x,'y':y},ignore_index=True)
coords = coords.set_index('House').sort_index()
SJNumClicks = np.sum(AllDf>0,axis=1)

#### Color Houses by Amount of Clicks (Green-Few, Red-Many, Black-Not Included)

In [None]:
fig = plt.figure(figsize=(15,15))
SeahavenMap = SeahavenMap.resize((450,500))
ax = plt.subplot2grid((10, 10), (0, 0), colspan=9,rowspan=10)
plt.imshow(SeahavenMap,aspect = 'equal')
cmap = plt.cm.get_cmap('Greens')
a=np.outer(np.arange(0,1,0.01),np.ones(3))
for i in list(coords.index.values):
    try:
        clicks = overallClicks.loc[int(i)]
        rgba = cmap((clicks-min(overallClicks))/(max(overallClicks)-min(overallClicks)))
        ax.add_patch(Circle((coords['y'].loc[i]-535,coords['x'].loc[i]-180), radius=5, color=(rgba)))
        #ax.add_patch(Circle((coords['y'].loc[i]-535,coords['x'].loc[i]-180), radius=5, color=((clicks-min(overallClicks))/(max(overallClicks)-min(overallClicks)),1-(clicks-min(overallClicks))/(max(overallClicks)-min(overallClicks)),0)))
    except:
        ax.add_patch(Circle((coords['y'].loc[i]-535,coords['x'].loc[i]-180), radius=5, color=(0,0,0)))
    
#plt.title('Overall Number of Clicks During Map Training',fontsize=20)
ax2 = plt.subplot2grid((10, 10), (0, 9),rowspan=10)
plt.imshow(a,aspect='auto',cmap='Greens',origin="lower")
ax2.get_xaxis().set_ticks([])
ax2.get_yaxis().set_ticks(np.linspace(0,99,10))
ax2.get_yaxis().set_ticklabels(np.around(np.linspace(min(overallClicks)/len(AllDf.columns),max(overallClicks)/len(AllDf.columns),10),2))
ax2.yaxis.tick_right()
ax2.set_ylabel("Average Number of Clicks on House",rotation=270, fontsize=15, labelpad=20)
ax2.yaxis.set_label_position("right")
plt.show()
#plt.savefig('Results/MapClicks.png', bbox_inches='tight')

In [None]:
import math
fig = plt.figure(figsize=(15,15))
SeahavenMap = SeahavenMap.resize((450,500))
ax = plt.subplot2grid((10, 10), (0, 0), colspan=9,rowspan=10)
plt.imshow(SeahavenMap,aspect = 'equal')
cmap = plt.cm.get_cmap('Greens')
a=np.outer(np.arange(0,1,0.01),np.ones(3))
for i in list(coords.index.values):
    try:
        clicks = SJNumClicks.loc[int(i)]
        rgba = cmap((clicks-min(SJNumClicks))/(max(SJNumClicks)-min(SJNumClicks)))
        ax.add_patch(Circle((coords['y'].loc[i]-535,coords['x'].loc[i]-180), radius=5, color=(rgba)))
        #ax.add_patch(Circle((coords['y'].loc[i]-535,coords['x'].loc[i]-180), radius=5, color=((clicks-min(overallClicks))/(max(overallClicks)-min(overallClicks)),1-(clicks-min(overallClicks))/(max(overallClicks)-min(overallClicks)),0)))
    except:
        continue
        #ax.add_patch(Circle((coords['y'].loc[i]-535,coords['x'].loc[i]-180), radius=5, color=(0,0,0)))
    
#plt.title('Overall Number of Subjects Looking at Respective House During Map Training',fontsize=20)
ax2 = plt.subplot2grid((10, 10), (0, 9),rowspan=10)
plt.imshow(a,aspect='auto',cmap='Greens',origin="lower")
ax2.get_xaxis().set_ticks([])
ax2.get_yaxis().set_ticks(np.linspace(0,99,10))
ax2.get_yaxis().set_ticklabels(np.linspace((min(SJNumClicks)/len(AllDf.columns))*100,(max(SJNumClicks)/len(AllDf.columns))*100,10,dtype=int))
ax2.yaxis.tick_right()
ax2.set_ylabel("Percentage of Subjects That Have Seen This House",rotation=270, fontsize=15, labelpad=20)
ax2.yaxis.set_label_position("right")
plt.show()
#plt.savefig('Results/MapSujClicks.png', bbox_inches='tight')

## Angular Differences

In [None]:
f2 = open("complete_list_houses.txt","r")
degreeDF = pd.DataFrame(columns={'Subject','Condition','AngularDiff','Performance'})
angles = {}
for line in f2:
    house = int(line.split('_',1)[0].split('n',1)[0])
    angle = int(line.split('_',1)[1].split('n',1)[0])
    angles[house] = angle
vp_nums = list(AllDf)
degree_30 = np.zeros((6,2))
degree_60 = np.zeros((6,2))
degree_90 = np.zeros((6,2))
degree_120 = np.zeros((6,2))
degree_150 = np.zeros((6,2))
degree_180 = np.zeros((6,2))
degrees = []
for i,e in enumerate(vp_nums):
    m = mat_to_py(taskPath,e)
    for cond in range(6):       
        for trial in range(36):
            degree = 0
            if cond < 2 or cond >3: # abs und poi
                degree = abs(int(m[cond][trial][-5])-int(m[cond][trial][-6])) # save angular diff in var
            else: # rel
                degree = abs(angles[m[cond][trial][-5]]-angles[m[cond][trial][-6]])
            degrees.append(degree)
            if degree <= 30 or degree >= 330:
                degreeDF = degreeDF.append({'Subject':e,'Condition':cond,'AngularDiff':30,'Performance':float(m[cond][trial][-1])},ignore_index=True)
                degree_30[cond][0] += 1 # increment counter for overall trial with 30 degree diff
                if m[cond][trial][-1]:
                    degree_30[cond][1] += 1 # increment counter for correct trial with 30 degree diff
            elif degree <= 60 or degree >= 300:
                degree_60[cond][0] += 1
                degreeDF = degreeDF.append({'Subject':e,'Condition':cond,'AngularDiff':60,'Performance':float(m[cond][trial][-1])},ignore_index=True)
                if m[cond][trial][-1]:
                    degree_60[cond][1] += 1
            elif degree <= 90 or degree >= 270:
                degree_90[cond][0] += 1
                degreeDF = degreeDF.append({'Subject':e,'Condition':cond,'AngularDiff':90,'Performance':float(m[cond][trial][-1])},ignore_index=True)
                if m[cond][trial][-1]:
                    degree_90[cond][1] += 1
            elif degree <= 120 or degree >= 240:
                degree_120[cond][0] += 1
                degreeDF = degreeDF.append({'Subject':e,'Condition':cond,'AngularDiff':120,'Performance':float(m[cond][trial][-1])},ignore_index=True)
                if m[cond][trial][-1]:
                    degree_120[cond][1] += 1
            elif degree <= 150 or degree >= 210:
                degree_150[cond][0] += 1
                degreeDF = degreeDF.append({'Subject':e,'Condition':cond,'AngularDiff':150,'Performance':float(m[cond][trial][-1])},ignore_index=True)
                if m[cond][trial][-1]:
                    degree_150[cond][1] += 1
            else:
                degree_180[cond][0] += 1
                degreeDF = degreeDF.append({'Subject':e,'Condition':cond,'AngularDiff':180,'Performance':float(m[cond][trial][-1])},ignore_index=True)
                if m[cond][trial][-1]:
                    degree_180[cond][1] += 1
allDegs = [degree_30,degree_60,degree_90,degree_120,degree_150,degree_180]

### Plot as Distribution:
One dot = average performance of one participant over all trials with this orientation

Plot like num click above

In [None]:
groupeddegreeInf = groupeddegree[(groupeddegree['Condition']==1)|(groupeddegree['Condition']==3)|(groupeddegree['Condition']==5)]

In [None]:
groupeddegreeAllInf = groupeddegreeInf.groupby(['Subject','AngularDiff'], as_index=False)['Performance'].mean()

In [None]:
groupeddegreeAllInf.head()

In [None]:
groupeddegreeAllInf.to_csv('DegreePerformanceInf.csv')#Average performance for each subject - angular difference combination
#over infinite task conditions

In [None]:
groupeddegreeAllInf = pd.read_csv('Results/DegreePerformanceInf.csv')

### Now Plot:

In [None]:
#plt.figure(figsize=(10,7))
sns.lmplot(x='AngularDiff',y='Performance',data = groupeddegreeAllInf, height=7,aspect=1.4,palette=["royalblue"],x_jitter=3,order=2)
#plt.title('Performance in Relation to Angular Difference - Infinite \n Averaged over Tasks and Trials with x Angular Difference for Each Subject',fontsize=20)
plt.xlabel('Angular Difference',fontsize=25)
plt.ylabel('Performance (%)',fontsize=25)
plt.xticks(np.linspace(0,180,7),fontsize=20)
plt.xlim(20,190)
plt.yticks(np.linspace(0,1,5),np.linspace(0,100,5,dtype=int),fontsize=20)
plt.subplots_adjust(top=0.9)
plt.show()
#plt.savefig('Results/AngDiffPerfPoly.png', bbox_inches='tight')

### Plot as Box Plot

In [None]:
ax = pt.RainCloud(data=groupeddegreeAllInf,x='AngularDiff',y='Performance', palette=["royalblue"],bw = 0.0,
                 width_viol = .0, figsize = (10,7),pointplot=True,alpha = 1, dodge = True, move = 0.0)

ax.set_xticklabels(np.linspace(30,180,6,dtype=int),fontsize=15)
#plt.title('Average Performance of Subjects Dependent on Angular Difference of Houses',fontsize=25)
plt.ylabel('Performance (%)',fontsize=20)
plt.xlabel("Angular Difference",fontsize=20)
plt.yticks(np.linspace(0,1,5),np.linspace(0,100,5,dtype=int),fontsize=15)
#plt.plot([-0.5, 9.5], [0.5291, 0.5291], linewidth=3,color='black',linestyle=':')
plt.plot([-0.5, 9.5], [0.5, 0.5], linewidth=3,color='black',linestyle=':')
plt.scatter(groupeddegreeAllInf['AngularDiff'],poly_2.predict(groupeddegreeAllInf['AngularDiff']), linewidth=3)
plt.show()
#plt.savefig('Results/AngDiffPerfRainCloud_NoTitle.png', bbox_inches='tight')

Black line = median, Red line = Mean

### One Way ANOVA

In [None]:
anovarm = AnovaRM(groupeddegreeAllInf,'Performance','Subject',within=['AngularDiff'])
fit = anovarm.fit()
fit.summary()

In [None]:
poly_2 = smf.ols(formula='Performance ~ 1 + AngularDiff + I(AngularDiff  **2)', data=groupeddegreeAllInf).fit()
poly_2.summary()

In [None]:
plt.scatter(groupeddegreeAllInf['AngularDiff'],poly_2.predict(groupeddegreeAllInf['AngularDiff']), linewidth=3)
plt.show()

### Linear Regression Model for Angular Differences

In [None]:
angdiffperf_model = ols("Performance ~ AngularDiff", data=groupeddegreeAllInf).fit()
print(angdiffperf_model.summary())

## Performance in Relation to Distance Between Houses

### No Binning, Average Over Subjects for Each House Combination
Only run next 3 cells once, then just load the .csv file.

In [None]:
coordinates = open("HouseList.txt","r")
coords = pd.DataFrame(columns={'House','x','y'})
for co in coordinates:
    x = float(co.split(':',1)[1].split(';',1)[0])
    y = float(co.split(';',1)[1])
    house = str(int(co.split(':',1)[0]))
    coords = coords.append({'House':house,'x':x,'y':y},ignore_index=True)
coords = coords.set_index('House').sort_index()

In [None]:
vp_nums = list(AllDf)
m1 = mat_to_py(taskPath,vp_nums[0])
houseOrder = []
for c in range(6):
    if c>1:
        allHouseNum = [x[1] for x in np.array(m1[c])]
        sort = np.sort(allHouseNum)
    else:
        allHouseNum = [x[0] for x in np.array(m1[c])]
        sort = np.sort(allHouseNum)
    houseOrder.append(list(sort))

In [None]:
conditions = ["Absolute - 3s ","Absolute - inf","Relative - 3s ","Relative - inf","Pointing 3s   ","Pointing - inf"]
tasks = ["Relative","Relative","Pointing","Pointing"]
Conds = ["3s","inf","3s","inf"]
DistPerfDF = pd.DataFrame(columns={'Subject','Task','Condition','Distance','Performance','HouseCombination'})

for i,e in enumerate(vp_nums):
    try:
        m = mat_to_py(taskPath,e)
        for c in range(4):       
            for t in range(36):
                h1 = (coords['x'].loc[str(m[c+2][t][0])],coords['y'].loc[str(m[c+2][t][0])])
                h2 = (coords['x'].loc[str(m[c+2][t][1])],coords['y'].loc[str(m[c+2][t][1])])
                dist = distance.euclidean(h1, h2)
                hC = houseOrder[c+2].index(m[c+2][t][1])
                DistPerfDF = DistPerfDF.append({'Subject':e,'Task':tasks[c],'Condition':Conds[c],
                                                'Distance':dist,'Performance':float(m[c+2][t][-1]),'HouseCombination':hC},ignore_index=True)     
    except:
        print(str(e)+" Not in folder")

In [None]:
DistPerfDF.to_csv("Results/DistancePerformanceAll.csv")

In [None]:
DistPerfDF = pd.read_csv("Results/DistancePerformanceAll.csv")

In [None]:
group = DistPerfDF.groupby(['HouseCombination','Task','Condition'], as_index=False)['Performance','Distance'].mean()

In [None]:
group.head()

In [None]:
group.to_csv("Results/DistPerfGroupedMean.csv")

In [None]:
group = pd.read_csv("Results/DistPerfGroupedMean.csv")

### Plotting:

In [None]:
plt.figure(figsize=(10,7))
sns.regplot(x="Distance", y="Performance", data=group[group['Condition']=="inf"],color='royalblue',ci=95)
#plt.title("Task Performance - Distance Between Houses in Seahaven \n One Point = Task,Condition,House Combination Averaged Over Subjects",fontsize=20)
plt.xlabel("Distance (Unity Units)",fontsize=20)
plt.ylabel("Performance (%)",fontsize=20)
plt.yticks(np.linspace(0,1,5),np.linspace(0,100,5,dtype=int),fontsize=15)
plt.xticks(np.linspace(0,400,9),fontsize=15)
plt.ylim(0.2,0.8)
plt.xlim(0,380)
#plt.savefig('Results/DistPerfRegression_NoTitle.png', bbox_inches='tight')
plt.show()

In [None]:
distperf_model = ols("Performance ~ Distance", data=group[group['Condition']=="inf"]).fit()
print(distperf_model.summary())

## Repeated Measurements

In [None]:
allFiles = os.listdir(mapPath)
conditions = ["Absolute - 3s ","Absolute - inf","Relative - 3s ","Relative - inf","Pointing 3s   ","Pointing - inf"]
g = open("./complete_list_houses.txt","r")
allHouses = []
for i in g:
    allHouses.append(str(int(i.split('_',1)[0])))
performances = pd.DataFrame(columns=['Subject','Measurement','Condition','Performance'])
for e in allFiles:
    if e.endswith(".ods"):
        usable,code = checkUsability(int(e.split('.',1)[0]),Rep=True)    
        if usable:
            #print(ord(str(code)[1])-97)
            m = mat_to_py(taskPath,(e.split('.',1)[0]))
            for c in range(6):       
                for t in range(36):
                    performances = performances.append({'Subject': ord(str(code)[1])-97,'Measurement':int(str(code)[0])-1,
                                                   'Condition':c,'Performance':int(m[c][t][-1])}, ignore_index=True)           

In [None]:
np.unique(performances['Subject'])# List of subjects

In [None]:
performances.to_csv("Results/RepeatedMPerformances.csv")

In [None]:
performances = pd.read_csv("Results/RepeatedMPerformances.csv")

#### Plot Performances Averaged over 14 Repeated Measure Subjects

In [None]:
performances['Performance'] = performances['Performance'].astype(float)
ax = sns.factorplot(x="Condition", y="Performance", hue="Measurement",data=performances,
                   size=5, kind="bar", palette="Blues",aspect=2, legend_out = False)
ax.set_xticklabels(conditions,fontsize=12)
ax.set_yticklabels(fontsize=12)
ax.set_xlabels('Condition',fontsize=15)
ax.set_ylabels('Performance',fontsize=15)
l = plt.legend(title="Measurement",fontsize=15)
l.get_texts()[0].set_text('1')
l.get_texts()[1].set_text('2')
l.get_texts()[2].set_text('3')
plt.setp(l.get_title(),fontsize=15)
ax.fig.suptitle('Average Performance in Each Task for Three Measurements',fontsize=15)
plt.show()
#plt.savefig('Results/RepMeasPerf.png', bbox_inches='tight')

In [None]:
repgroup = performances.groupby(['Measurement','Subject','Condition'], as_index=False)['Performance'].mean()
repgroup.head()

In [None]:
repgroup.to_csv("Results/RepeatedMPerformanceGrouped.csv")

In [None]:
repgroup = pd.read_csv("Results/RepeatedMPerformanceGrouped.csv")

In [None]:
conditions = ["Absolute \n 3s ","Absolute \n inf","Relative \n 3s ","Relative \n inf","Pointing \n 3s   ","Pointing \n inf"]
plt.figure(figsize=(10,7))
ax = sns.boxplot(x="Condition", y="Performance", hue="Measurement",data=repgroup,
                   palette=sns.xkcd_palette(['lightblue','blue','denim blue']))
ax.set_xticklabels(conditions,fontsize=15,rotation=0)
l = plt.legend(title="Measurement",fontsize=15,loc=4)
l.get_texts()[0].set_text('1')
l.get_texts()[1].set_text('2')
l.get_texts()[2].set_text('3')
plt.setp(l.get_title(),fontsize=15)
plt.plot([-0.5, 9.5], [0.5, 0.5], linewidth=3,color='black',linestyle=':')
plt.xlabel("Condition",fontsize=20)
plt.ylabel("Performance (%)",fontsize=20)
plt.yticks(np.linspace(0,1,5),np.linspace(0,100,5,dtype=int),fontsize=15)
#plt.title('Average Performance in Each Task for Three Measurements',fontsize=25)
#ax.fig.suptitle('Average Performance in Each Task for Three Measurements',fontsize=15)
plt.show()
#plt.savefig('Results/RepMeasPerfBox_NoTitle.png', bbox_inches='tight')

In [None]:
TaskTimeDF = pd.DataFrame(columns={'Subject','Measurement','Task','Time','Performance'})
tasks = ['Absolute','Absolute','Relative','Relative','Pointing','Pointing']
times = ['3s','inf','3s','inf','3s','inf']
for i in range(252):
    TaskTimeDF = TaskTimeDF.append({'Subject':repgroup['Subject'][i],'Measurement':repgroup['Measurement'][i],'Task':tasks[repgroup['Condition'][i]],'Time':times[repgroup['Condition'][i]],'Performance':repgroup['Performance'][i]},ignore_index=True)

In [None]:
TaskTimeDF.head()

In [None]:
TaskTimeDF.to_csv("Results/RepeatedTaskTinePerformance.csv")

### Repeated Measure ANOVA Within Task, Time and Measurement

In [None]:
anovarm = AnovaRM(TaskTimeDF,'Performance','Subject',within=['Task','Time','Measurement'])
fit = anovarm.fit()
fit.summary()

## FRS Results

In [None]:
frs = pd.read_excel('FRS_MAP_64_final_sk copy.xlsx')

In [None]:
frs.head()

In [None]:
frsDF = pd.DataFrame(columns={'Performance','Scale','Task'})
for i in range(65):
    frsDF = frsDF.append({'Performance':frs['AbsInf'][i],'Scale':frs['ScaleMean'][i],'Task':'Absolute Inf'},ignore_index=True)
    frsDF = frsDF.append({'Performance':frs['RelInf'][i],'Scale':frs['ScaleMean'][i],'Task':'Relative Inf'},ignore_index=True)
    frsDF = frsDF.append({'Performance':frs['PointInf'][i],'Scale':frs['ScaleMean'][i],'Task':'Pointing Inf'},ignore_index=True)

In [None]:
fig, ax = plt.subplots(figsize=(10,7))
xlim = [1,7]
ax.set_xlim(xlim)
sns.regplot(x='ScaleMean', y='AbsInf', data=frs, ci=None, ax=ax,color='royalblue')
sns.regplot(x='ScaleMean', y='RelInf', data=frs, ci=None, ax=ax,color='blue')
sns.regplot(x='ScaleMean', y='PointInf', data=frs, ci=None, ax=ax,color='darkblue')
plt.xlabel("Spatial Ability Score",fontsize=20)
plt.ylabel("Performance (%)",fontsize=20)
plt.yticks(np.linspace(20,80,4),np.linspace(20,80,4,dtype=int),fontsize=15)
plt.xticks(fontsize=15)
plt.legend(['Absolute','Relative','Pointing'],fontsize=15,loc=4)
ax.set_ylim([20,80])
plt.show()
#plt.savefig('Results/FRSPoiInfRegression_New.png', bbox_inches='tight')

### Linear Regression - Pointing Infinite

In [None]:
frsPoiperf_model = ols("PointInf ~ ScaleMean", data=frs).fit()
print(frsPoiperf_model.summary())

In [None]:
#Pearson Correlation:
scipy.stats.pearsonr(frs['ScaleMean'][:64],frs['PointInf'][:64])

### Linear Regression - Absolute Infinite

In [None]:
frsPoiperf_model = ols("AbsInf ~ ScaleMean", data=frs).fit()
print(frsPoiperf_model.summary())

In [None]:
#Pearson Correlation:
scipy.stats.pearsonr(frs['ScaleMean'][:64],frs['AbsInf'][:64])

### Linear Regression - Relative Infinite

In [None]:
frsPoiperf_model = ols("RelInf ~ ScaleMean", data=frs).fit()
print(frsPoiperf_model.summary())

In [None]:
#Pearson Correlation:
scipy.stats.pearsonr(frs['ScaleMean'][:64],frs['RelInf'][:64])