# Analyze Eye Gaze Data

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import pickle
from statsmodels.robust.scale import mad

import matplotlib.pyplot as plt

In [None]:
a = pd.read_csv('adhd_eye_movement_data/user_info.csv')
a.iloc[a.index[a['Subject'] == 3][0]]['Group']

In [None]:
def load_raw_data(subject_num):
    data_dir = 'adhd_eye_movement_data/'
    raw_data_dir = data_dir + 'raw_data/'
    user_info_df = pd.read_csv(data_dir + 'user_info.csv')

    user_info_df_subj = user_info_df['Subject']
    data = pd.DataFrame()

    # If off-ADHD/on-ADHD subject:
    if user_info_df_subj.value_counts()[subject_num] == 2:
        data = pd.read_csv("{}/subject_{}_off_ADHD.csv".format(raw_data_dir, subject_num))
    elif user_info_df_subj.value_counts()[subject_num] == 1:
        group = user_info_df.iloc[user_info_df.index[user_info_df['Subject'] == subject_num][0]]['Group']

        if group == 'off-ADHD':
            data = pd.read_csv("{}/subject_{}_off_ADHD.csv".format(raw_data_dir, subject_num))
        elif group == 'Ctrl':
            data = pd.read_csv("{}/subject_{}_Ctrl.csv".format(raw_data_dir, subject_num))
    else:
        return -1

    data = data.dropna(subset=['Time']).set_index('Time')
    data.index = data.index.astype(int)
    
    return data

def extract_trials(raw_data):
    trials = []
    for idx, row in raw_data[raw_data['Events'] == 7].iterrows():
        # Find start of trial
        start_interval = raw_data.loc[idx-5250:idx-4250]
        end_interval = raw_data.loc[idx:idx+4000]
        
        start_idx = idx - 4750
        if 1 in start_interval['Events'].values:
            assert start_interval['Events'].value_counts()[1] == 1

            start_idx = start_interval.index[start_interval['Events'] == 1].to_list()[0]
        elif 2 in start_interval['Events'].values:
            assert start_interval['Events'].value_counts()[2] == 1

            start_idx = start_interval.index[start_interval['Events'] == 2].to_list()[0]
        
        trials.append(raw_data.loc[start_idx:start_idx+7999].to_numpy())
    
    return trials

In [None]:
test = load_raw_data(16)
test

In [None]:
test_trials = extract_trials(test)
test_trials

In [None]:
test_trials[0][:, 3]

# Data Preprocessing

## Heuristic Spike Filter + Signal Loss + Noise Reduction

In [None]:
from eye_gaze_analysis import *

clf = EyegazeClassifier()

data = {'x': test_trials[0][:, 1], 'y': test_trials[0][:, 2]}

In [None]:
events = clf(clf.preproc(data))
events

In [None]:
clf.show_gaze(pp=clf.preproc(data), events=events)

In [None]:
filtered_x = heuristic_spike_filter(raw_x)
plt.plot(filtered_x)

In [None]:
filtered_y = heuristic_spike_filter(raw_y)
plt.plot(filtered_y)

In [None]:
# Euclidean distance
# Input: (x1, y1), (x2, y2)
def euclidean_dist(point1, point2):
    return np.sqrt(np.square(point1[0] - point2[0]) + np.square(point1[1] - point2[1]))

# Point-to-point velocity
# Input: (x1, y1), (x2, y2)
def ptp_velocity(point1, point2, t1, t2):
    return euclidean_dist(point1, point2) / (t2 - t1)

# Point-to-point velocities for a given interval
# Input: df 
def ptp_velocities(data):
    velocities = []
    for idx, row in data.iterrows():
        if idx + 1 < len(data):
            pass

print(euclidean_dist(5, 0, 100, 0))
print(ptp_velocity(5, 0, 100, 0, 5, 10))

# Statistical Tests

In [None]:
def classifySubjectN(n, trial_num):
    # Input: subject number
    # Output: EyegazeClassifier call output on preprocessed data for specified trial number
    subject_raw_data = load_raw_data(n)
    subject_trials = extract_trials(subject_raw_data)

    # Eye Gaze Analysis Preprocessing
    clf = EyegazeClassifier()
    subject_data = {'x': subject_trials[trial_num][:, 1], 'y': subject_trials[trial_num][:, 2]}
    events = clf(clf.preproc(subject_data))
    return events

def congregateSubjectDataN(n):
    # Input: Subject Number
    # Output: Summary data for all trials
    res = []
    for i in range(160):
        # Assumption of 160 trials! Must change if number changes
        res.append(classifySubjectN(n, i))
    return res

def congregateSpecific(n, field):
    res = []
    for i in range(160):
        res.append([d for d in classifySubjectN(n, i) if d['label'] == field][0])
    return res

def congregateSpecificMeasurement(n, field, measurement):
    res = []
    for i in range(160):
        intermediate = [d for d in classifySubjectN(n, i) if d['label'] == field]
        if not len(intermediate):
            # This behavior skips trials where field of interest is not produced for example 'SACC' is not available
            continue
        dictionary = intermediate[0]
        res.append(dictionary[measurement])
    return res

In [None]:
congregateSpecificMeasurement(2, 'SACC', 'avg_vel')

# Scanpath Analysis

In [None]:
plt.plot(test['Position_1'], test['Position_2'])

In [None]:
plt.plot(fetch_data(45)['Position_1'], fetch_data(45)['Position_2'])

In [None]:
test.where(test['Events'])