# ERP analysis tutorial



In [None]:
import os

import numpy as np
import pandas as pd

from mne import read_evokeds
from mne import grand_average
from mne.viz import plot_compare_evokeds

from meeg_tools.time_frequency import get_erp_peak_measures, get_erp_measures_from_cross_condition_data

from matplotlib import pyplot as plt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

%matplotlib qt

## Read the data

In [None]:
# Change this path below to the path where the pipeline saved the .fif.gz  ERP files!
erp_data_path = 'C:/Users/User/Documents/TRIPLET/TRIPLET'

# List the files
erp_H_file_names = sorted([f for f in os.listdir(erp_data_path) if 'day1_h' in f.lower()])
erp_L_file_names = sorted([f for f in os.listdir(erp_data_path) if 'day1_l' in f.lower()])

# All ERP files 
erp_file_names = sorted([f for f in os.listdir(erp_data_path) if 'day1' in f.lower()])


# Read the data
erp_H = [read_evokeds(os.path.join(erp_data_path, f), verbose=0)[0] for f in erp_H_file_names]
erp_L = [read_evokeds(os.path.join(erp_data_path, f), verbose=0)[0] for f in erp_L_file_names]


erp_all_conditions = [read_evokeds(os.path.join(erp_data_path, f), verbose=0)[0] for f in erp_file_names]

In [None]:
# Check the number of files for each condition
len(erp_H), len(erp_L)

## Compare ERP between the predictable and unpredictable stimuli

In [None]:
# Inspect grand average power per condition (H, L)
erp_H_grand_average = grand_average(erp_H)
erp_L_grand_average = grand_average(erp_L)

# Ignore subset of channels (['Fp1', 'Fp2', 'AF7', 'AF3', 'AFz', 'AF8', 'AF4'])
ch_names =  [ch for ch in erp_H_grand_average.info['ch_names'] if ch not in ['Fp1', 'Fp2', 
                                                                               'AF7', 'AF3',
                                                                               'AFz', 'AF8', 'AF4']]

erp_H_grand_average = erp_H_grand_average.pick_channels(ch_names)#.apply_baseline((-0.25, 0.0),)
erp_L_grand_average = erp_L_grand_average.pick_channels(ch_names)#.apply_baseline((-0.25, 0.0),)

In [None]:
# Inspect scalp topographies with the plot_topomap method

# Here we display topomaps from -200 ms, to 750 ms with 50 ms step size.
times = np.arange(-0.2, 0.8, 0.05) # from -200 ms to 750 ms with 50 ms step size
# You can use fewer times with explicitly defining the time points in a list:
#times = [0.1, 0.2, 0.5]
erp_H_grand_average.plot_topomap(nrows=2, ncols=10, times=times, average=None)

In [None]:
erp_L_grand_average.plot_topomap(nrows=2, ncols=10, times=times, average=None)

In [None]:
# This will plot ERPs for each channel, click on a channel to expand the plot
erp_H_grand_average.copy().plot_topo(title='Grand Average ERP')

In [None]:
# This will plot ERPs for each channel, click on a channel to expand the plot
erp_L_grand_average.copy().plot_topo(title='Grand Average ERP')

In [None]:
# Plot with selected channels
picks = ['F7', 'F5', 'F1', 'F4', 'F8', 'F2']

tmin = -0.25 # plot data from 250 ms prior to stimulus onset
tmax = 0.75  # plot data to 750 ms after to stimulus onset
baseline = (-0.25, 0.0) # apply baseline from 250 ms prior to stimulus onset to 0.0

# To remove baseline uncomment this line below
#baseline = (None, None)


evokeds = dict(triplet_H=erp_H_grand_average.copy().crop(tmin=tmin, tmax=tmax).apply_baseline(baseline),
               triplet_L=erp_L_grand_average.copy().crop(tmin=tmin, tmax=tmax).apply_baseline(baseline))

plot_compare_evokeds(evokeds,
                     combine='mean', # 'median'
                     picks=picks)

# with all channels
plot_compare_evokeds(evokeds,
                     combine='mean') # 'median'

In [None]:
# combined grand average (all subjects and all conditions)

erp_all_conditions_average = grand_average(erp_all_conditions)

erp_all_conditions_average = erp_all_conditions_average.copy().apply_baseline((-0.25, 0.0),)

plot_compare_evokeds(evokeds=erp_all_conditions_average,
                     picks=picks,
                    combine='mean') # 'median'

## Peak latency and amplitude


mode='pos': finds the peak with a positive voltage (ignores negative voltages)

mode='neg': finds the peak with a negative voltage (ignores positive voltages)

mode='abs': finds the peak with the largest absolute voltage regardless of sign (positive or negative)

In [None]:
cross_condition_measures = get_erp_peak_measures(erp=erp_all_conditions_average,
                                                 tmin=0.12,
                                                 tmax=0.22,
                                                 mode='neg', picks=["P7", "P9", "P5"])

cross_condition_measures

In [None]:

#erp_all_conditions_pick = [erp.copy().pick_channels(['P7']) for erp in erp_all_conditions]


In [None]:
peak_measures = get_erp_measures_from_cross_condition_data(erp_arrays=erp_all_conditions,
                                                           cross_condition_data=cross_condition_measures,
                                                           interval_in_seconds=0.2)

In [None]:
# This is a convenient way to exctract triplet information from the "fid" column values
# E.g. 10_S_Day1_H  with this command we extract -> 10, S, Day1, H 
# E.g. 10_S_Day1_1_H modify from 3 to 4 to extract epochs information as well
# peak_measures.loc[:, 'epoch'] = peak_measures['fid'].str.split('_', 4, expand=True)[3]
# peak_measures.loc[:, 'triplet'] = peak_measures['fid'].str.split('_', 4, expand=True)[4]

#print(peak_measures['fid'].str.split('_', 3, expand=True))

peak_measures.loc[:, 'triplet'] = peak_measures['fid'].str.split('_', 3, expand=True)[3]


In [None]:
peak_measures

In [None]:
peak_measures.groupby(['ch_name', 'triplet'])['mean_amplitude'].describe()

In [None]:
peak_measures.to_csv(os.path.join(erp_data_path, 'peak_measures.csv'), index=False)