# Analyze Eye Gaze Data

In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import pickle
from statsmodels.robust.scale import mad

import matplotlib.pyplot as plt

In [21]:
a = pd.read_csv('adhd_eye_movement_data/user_info.csv')
a.iloc[a.index[a['Subject'] == 3][0]]['Group']

'off-ADHD'

In [22]:
def load_raw_data(subject_num):
    data_dir = 'adhd_eye_movement_data/'
    raw_data_dir = data_dir + 'raw_data/'
    user_info_df = pd.read_csv(data_dir + 'user_info.csv')

    user_info_df_subj = user_info_df['Subject']
    data = pd.DataFrame()

    # If off-ADHD/on-ADHD subject:
    if user_info_df_subj.value_counts()[subject_num] == 2:
        data = pd.read_csv("{}/subject_{}_off_ADHD.csv".format(raw_data_dir, subject_num))
    elif user_info_df_subj.value_counts()[subject_num] == 1:
        group = user_info_df.iloc[user_info_df.index[user_info_df['Subject'] == subject_num][0]]['Group']

        if group == 'off-ADHD':
            data = pd.read_csv("{}/subject_{}_off_ADHD.csv".format(raw_data_dir, subject_num))
        elif group == 'Ctrl':
            data = pd.read_csv("{}/subject_{}_Ctrl.csv".format(raw_data_dir, subject_num))
    else:
        return -1

    data = data.dropna(subset=['Time']).set_index('Time')
    data.index = data.index.astype(int)
    
    return data

def extract_trials(raw_data):
    trials = []
    for idx, row in raw_data[raw_data['Events'] == 7].iterrows():
        # Find start of trial
        start_interval = raw_data.loc[idx-5250:idx-4250]
        end_interval = raw_data.loc[idx:idx+4000]
        
        start_idx = idx - 4750
        if 1 in start_interval['Events'].values:
            assert start_interval['Events'].value_counts()[1] == 1

            start_idx = start_interval.index[start_interval['Events'] == 1].to_list()[0]
        elif 2 in start_interval['Events'].values:
            assert start_interval['Events'].value_counts()[2] == 1

            start_idx = start_interval.index[start_interval['Events'] == 2].to_list()[0]
        
        trials.append(raw_data.loc[start_idx:start_idx+7999].to_numpy())
    
    return trials

In [26]:
test = load_raw_data(16)
test

Unnamed: 0_level_0,Diameter,Position_1,Position_2,Events
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2585947,10998.0,926.8,512.0,0
2585948,11002.0,926.6,511.5,0
2585949,11004.0,926.7,511.7,0
2585950,11007.0,926.6,512.2,0
2585951,11006.0,926.5,512.6,0
...,...,...,...,...
4946164,6659.0,942.7,629.7,0
4946165,6656.0,942.8,628.8,0
4946166,6653.0,942.8,629.1,0
4946167,6654.0,942.8,629.4,0


In [77]:
len(extract_trials(test))

160

# Data Preprocessing

## Heuristic Spike Filter + Signal Loss + Noise Reduction

In [6]:
x_vals = a[0]['Position_1'].values
y_vals = a[0]['Position_2'].values

data = {'x': x_vals, 'y': y_vals}

In [7]:
clf = EyegazeClassifier()

In [8]:
events = clf(clf.preproc(data))
events

[{'id': None,
  'label': 'PURS',
  'start_time': 0.122,
  'end_time': 0.206,
  'start_x': 900.0103051747063,
  'start_y': 728.9139761167664,
  'end_x': 885.6726227333086,
  'end_y': 800.753162317563,
  'amp': 0.8141851946325218,
  'peak_vel': 82.98032114439351,
  'med_vel': 5.536163289037253,
  'avg_vel': 13.716671700005493},
 {'id': None,
  'label': 'FIXA',
  'start_time': 0.206,
  'end_time': 0.281,
  'start_x': 885.6756302521055,
  'start_y': 800.7371959310084,
  'end_x': 882.435603715175,
  'end_y': 801.4207872622777,
  'amp': 0.033113545669710955,
  'peak_vel': 8.240750409460293,
  'med_vel': 3.3567486737932786,
  'avg_vel': 3.3402581924208326},
 {'id': 5,
  'label': 'SACC',
  'start_time': 0.281,
  'end_time': 0.297,
  'start_x': 882.2191950464444,
  'start_y': 802.477664750115,
  'end_x': 876.7396727111947,
  'end_y': 835.6215833701947,
  'amp': 0.33593816492279766,
  'peak_vel': 32.359101139381124,
  'med_vel': 23.29201315798732,
  'avg_vel': 21.842108177454538},
 {'id': None,


In [None]:
clf.draw_fixations(events)

NameError: name 'col' is not defined

In [None]:
clf.show_gaze(pp=clf.preproc(data), events=events)

In [None]:
test

In [None]:
raw_x = a[0]['Position_1'].values
plt.plot(raw_x)

In [None]:
raw_y = a[0]['Position_2'].values
plt.plot(raw_y)

In [None]:
filtered_x = heuristic_spike_filter(raw_x)
plt.plot(filtered_x)

In [None]:
filtered_y = heuristic_spike_filter(raw_y)
plt.plot(filtered_y)

In [None]:
# Euclidean distance
# Input: (x1, y1), (x2, y2)
def euclidean_dist(point1, point2):
    return np.sqrt(np.square(point1[0] - point2[0]) + np.square(point1[1] - point2[1]))

# Point-to-point velocity
# Input: (x1, y1), (x2, y2)
def ptp_velocity(point1, point2, t1, t2):
    return euclidean_dist(point1, point2) / (t2 - t1)

# Point-to-point velocities for a given interval
# Input: df 
def ptp_velocities(data):
    velocities = []
    for idx, row in data.iterrows():
        if idx + 1 < len(data):
            pass

print(euclidean_dist(5, 0, 100, 0))
print(ptp_velocity(5, 0, 100, 0, 5, 10))

# Scanpath Analysis

In [None]:
plt.plot(test['Position_1'], test['Position_2'])

In [None]:
plt.plot(fetch_data(45)['Position_1'], fetch_data(45)['Position_2'])

In [None]:
test.where(test['Events'])