# ERP analysis tutorial



In [1]:
import os

import numpy as np
import pandas as pd

from mne import read_evokeds
from mne import grand_average
from mne.viz import plot_compare_evokeds

from meeg_tools.time_frequency import get_erp_peak_measures, get_erp_measures_from_cross_condition_data

from matplotlib import pyplot as plt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

%matplotlib qt

  **kwargs


## Read the data

In [3]:
# Change this path below to the path where the pipeline saved the .fif.gz  ERP files!
erp_data_path = '/Users/weian/Downloads/Raw_data/preprocessed/erp/erp_asrt_longer/TRIPLET'

# List the files
erp_H_file_names = sorted([f for f in os.listdir(erp_data_path) if 'Day1_H' in f])
erp_L_file_names = sorted([f for f in os.listdir(erp_data_path) if 'Day1_L' in f])

# All ERP files 
erp_file_names = sorted([f for f in os.listdir(erp_data_path) if 'Day1' in f])


# Read the data
erp_H = [read_evokeds(os.path.join(erp_data_path, f), verbose=0)[0] for f in erp_H_file_names]
erp_L = [read_evokeds(os.path.join(erp_data_path, f), verbose=0)[0] for f in erp_L_file_names]


erp_all_conditions = [read_evokeds(os.path.join(erp_data_path, f), verbose=0)[0] for f in erp_file_names]

## Compare ERP between the predictable and unpredictable stimuli

In [5]:
# Inspect grand average power per condition (H, L)
erp_H_grand_average = grand_average(erp_H)
erp_L_grand_average = grand_average(erp_L)

# Ignore subset of channels (['Fp1', 'Fp2', 'AF7', 'AF3', 'AFz', 'AF8', 'AF4'])
ch_names =  [ch for ch in erp_H_grand_average.info['ch_names'] if ch not in ['Fp1', 'Fp2', 
                                                                               'AF7', 'AF3',
                                                                               'AFz', 'AF8', 'AF4']]

erp_H_grand_average = erp_H_grand_average.pick_channels(ch_names)#.apply_baseline((-0.25, 0.0),)
erp_L_grand_average = erp_L_grand_average.pick_channels(ch_names)#.apply_baseline((-0.25, 0.0),)

Identifying common channels ...
Identifying common channels ...


In [9]:
# Plot with selected channels
picks = ['F7', 'F5', 'F1', 'F4', 'F8', 'F2']

tmin = -0.25 # plot data from 250 ms prior to stimulus onset
tmax = 0.75  # plot data to 750 ms after to stimulus onset
baseline = (-0.25, 0.0) # apply baseline from 250 ms prior to stimulus onset to 0.0

# To remove baseline uncomment this line below
#baseline = (None, None)


evokeds = dict(triplet_H=erp_H_grand_average.copy().crop(tmin=tmin, tmax=tmax).apply_baseline(baseline),
               triplet_L=erp_L_grand_average.copy().crop(tmin=tmin, tmax=tmax).apply_baseline(baseline))

plot_compare_evokeds(evokeds,
                     combine='mean', # 'median'
                     picks=picks)

# with all channels
plot_compare_evokeds(evokeds,
                     combine='mean') # 'median'

combining channels using "mean"
combining channels using "mean"
combining channels using "mean"
combining channels using "mean"


[<Figure size 1600x1200 with 1 Axes>]

In [7]:
# combined grand average (all subjects and all conditions)

erp_all_conditions_average = grand_average(erp_all_conditions)

erp_all_conditions_average = erp_all_conditions_average.copy().apply_baseline((-0.25, 0.0),)

plot_compare_evokeds(evokeds=erp_all_conditions_average,
                     picks=picks,
                    combine='mean') # 'median'

Identifying common channels ...
combining channels using "mean"


[<Figure size 1600x1200 with 1 Axes>]

## Peak latency and amplitude


mode='pos': finds the peak with a positive voltage (ignores negative voltages)

mode='neg': finds the peak with a negative voltage (ignores positive voltages)

mode='abs': finds the peak with the largest absolute voltage regardless of sign (positive or negative)

In [20]:
cross_condition_measures = get_erp_peak_measures(erp=erp_all_conditions_average,
                                                 tmin=0.2,
                                                 tmax=0.5,
                                                 mode='neg',
                                                picks=['F7', 'F5', 'F1', 'F4', 'F8', 'F2'])

cross_condition_measures

Unnamed: 0,fid,ch_name,tmin,tmax,mode,peak_latency,peak_amplitude
0,Grand average (n = 30),F7 F5 F1 F4 F8 F2,0.2,0.5,neg,0.326,-1.705906


In [21]:
peak_measures = get_erp_measures_from_cross_condition_data(erp_arrays=erp_all_conditions,
                                                           cross_condition_data=cross_condition_measures,
                                                           interval_in_seconds=0.2)

  mean_amp = sign_mean_data.mean(axis=0) * 1e6
  ret = ret.dtype.type(ret / rcount)


In [22]:
peak_measures

Unnamed: 0,fid,ch_name,tmin,tmax,mode,peak_latency,peak_amplitude,mean_amplitude
0,10_S_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.126,-4.011214,-1.317752
1,10_S_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.126,-3.613766,-1.13043
2,11_S_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.13,-3.010411,-0.626112
3,11_S_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.13,-3.154534,-0.859373
4,13_S_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.264,-1.092714,-0.667705
5,13_S_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.26,-0.981097,-0.52862
6,14_S_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.334,-2.773254,-1.173872
7,14_S_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.332,-2.632163,-1.018431
8,15_L_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.126,-4.123166,-2.365622
9,15_L_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.126,-3.89517,-2.382338


In [23]:
# This is a convenient way to exctract triplet information from the "fid" column values
# E.g. 10_S_Day1_H  with this command we extract -> 10, S, Day1, H 
# E.g. 10_S_Day1_1_H modify from 3 to 4 to extract epochs information as well
# peak_measures.loc[:, 'epoch'] = peak_measures['fid'].str.split('_', 4, expand=True)[3]
# peak_measures.loc[:, 'triplet'] = peak_measures['fid'].str.split('_', 4, expand=True)[4]

#print(peak_measures['fid'].str.split('_', 3, expand=True))

peak_measures.loc[:, 'triplet'] = peak_measures['fid'].str.split('_', 3, expand=True)[3]


In [24]:
peak_measures

Unnamed: 0,fid,ch_name,tmin,tmax,mode,peak_latency,peak_amplitude,mean_amplitude,triplet
0,10_S_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.126,-4.011214,-1.317752,H
1,10_S_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.126,-3.613766,-1.13043,L
2,11_S_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.13,-3.010411,-0.626112,H
3,11_S_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.13,-3.154534,-0.859373,L
4,13_S_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.264,-1.092714,-0.667705,H
5,13_S_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.26,-0.981097,-0.52862,L
6,14_S_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.334,-2.773254,-1.173872,H
7,14_S_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.332,-2.632163,-1.018431,L
8,15_L_Day1_H,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.126,-4.123166,-2.365622,H
9,15_L_Day1_L,F7 F5 F1 F4 F8 F2,0.126,0.526,neg,0.126,-3.89517,-2.382338,L


In [27]:
peak_measures.groupby(['ch_name', 'triplet'])['mean_amplitude'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
ch_name,triplet,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
F7 F5 F1 F4 F8 F2,H,14.0,-1.514534,0.778164,-2.956454,-2.191392,-1.245812,-0.849643,-0.626112
F7 F5 F1 F4 F8 F2,L,14.0,-1.485142,0.815291,-3.023448,-2.264225,-1.074431,-0.884243,-0.52862


In [29]:
peak_measures.to_csv(os.path.join(erp_data_path, 'peak_measures.csv'), index=False)