In [1]:
from os import listdir

from hdf5storage import loadmat, savemat 
import numpy as np 
from matplotlib import pyplot as plt

from scipy import signal 
from scipy.fftpack import fft

from sklearn.covariance import GraphicalLassoCV

import sklearn
print('The scikit-learn version is {}.'.format(sklearn.__version__)) # make sure it is newer version, works in version 1.2.1

The scikit-learn version is 1.2.1.


In [2]:
# load data
def loaddata(filename):
    data = loadmat(filename)
    bpchan = int(data['bpchan'][0][0])
    channels = data['channels'][0]
    conditionNames = data['conditionNames'][0]
    conditions = data['conditions'][0]
    eeg = dict()
    eeg[0] = data['dataL'][0]
    eeg[1] = data['dataR'][0]
    intervals = data['intervals'][0]
    labels = data['labels'][0]
    samples = data['samples'][0]
    session = int(data['session'][0])
    sessionTypes = data['sessionTypes'][0]
    sr = int(data['sr'][0])
    #Labels and condition names doesnt come over cleanly from matlab 
    chan_labels = list()
    condition_Names = list()
    #pythonify condition index
    condition_index = list()
    for j in range(len(labels)):
        chan_labels.append(labels[j][0])
    for j in range(len(conditionNames)):
        condition_Names.append(conditionNames[j][0])
    for j in range(12):
        condition_index.append(int(conditions[j]-1))
    return eeg, intervals, samples, condition_index, session, sr, bpchan, condition_Names, channels, chan_labels, sessionTypes   


In [3]:
def avref(datax):
	ref = np.mean(datax,axis = 1)
	refmat = np.tile(ref,(np.shape(datax)[1],1))
	datax = datax - np.transpose(refmat)
	return datax

In [4]:
def segmenteegdata(eegdata,epochlength,samplingrate):
    epoch = int(epochlength*samplingrate)
    nsamp = np.shape(eegdata)[0]
    nchan = np.shape(eegdata)[1]
    nepoch = int(nsamp/epoch)
    nsamp = nepoch*epoch
    segdata = np.reshape(eegdata[0:nsamp,0:nchan],(nepoch,epoch,nchan))
    return segdata


In [5]:
def spectralmatrix(fcoef):
	power = np.var(fcoef,axis = 0)
	nchan = np.shape(fcoef)[2]
	nbin = np.shape(fcoef)[1]
	amplitude_corr = np.zeros((nbin,nchan,nchan))
	cross_spectrum = np.zeros((nbin,nchan,nchan),dtype = complex)
	coherence = np.zeros((nbin,nchan,nchan))
	for f in range(nbin):
		x = np.transpose(np.squeeze(fcoef[:,f,:]))
		y = np.abs(np.corrcoef(x))**2
		z = np.cov(x)
		a = np.corrcoef(np.abs(x))
		amplitude_corr[f,:,:] = a
		coherence[f,:,:] = y
		cross_spectrum[f,:,:] = z
	return power,amplitude_corr,cross_spectrum,coherence

In [6]:
def partialcorrelation(ampdata,ncv=2):
    pc_lasso = GraphicalLassoCV(cv=ncv)
    nbin = np.shape(ampdata)[1]
    nchan = np.shape(ampdata)[2]
    pcorr = np.zeros((nbin,nchan,nchan))
    alphas = np.zeros(nbin)
    covmat = np.zeros((nbin,nchan,nchan))
    for f in range(nbin):
        xx= np.abs(np.squeeze(ampdata[:,f,:]))
        pc_lasso.fit(xx) # the system is too ill for this solver (error occur in one bad data epoch, removed in subsequent analysis)
        covariance = pc_lasso.covariance_
        precision = pc_lasso.precision_
        alphas[f] = pc_lasso.alpha_
        parcor = np.zeros((32,32))
        y = np.diag(precision)
        yy = np.outer(y,y)
        yy = np.sqrt(yy)
        parcor = precision/yy
        pcorr[f,:,:] = parcor
        y = np.diag(covariance)
        yy = np.outer(y,y)
        yy = np.sqrt(yy)
        parcor_cov = covariance/yy        
        covmat[f,:,:] = parcor_cov 
    return pcorr,covmat,alphas  

This is the main data analysis block 

In [7]:
#parameters 
epoch = 1 #units: seconds
maxf = 30 #units: Hz
maxbin = maxf*epoch #convert maxf into number of bins to save
matfile = list()
argout = list()
filedir="/home/zhibinz2/zhibin/analysis/"
pathname = 'Cleaned_data/'

for file in listdir(filedir+pathname):
	if file.endswith(".mat"):
		matfile.append(file)


In [8]:
for pickfile in range(12):  # This now takes about 1 hours. The skggm takes 4 days!
	try:
		filename = matfile[pickfile]
		[eeg, intervals, samples, condition_index, session, sr, bpchan, 
   		condition_Names, channels, chan_labels, sessionTypes] \
			= loaddata(filedir+pathname+filename)
		#clear output arrays 
		power_mat = np.zeros((2,12,maxbin,32))
		amp_corr_mat = np.zeros((2,12,maxbin,32,32))
		c_spect_mat = np.zeros((2,12,maxbin,32,32),dtype='complex')
		coh_mat = np.zeros((2,12,maxbin,32,32))
		pcorr_mat = np.zeros((2,12,maxbin,32,32))
		pcorr_cov_mat = np.zeros((2,12,maxbin,32,32))
		pcorr_alphas = np.zeros((2,12,maxbin))
		for k in range(12): # for each trial
			#align the length of the two files. 
			nsamp1 = np.shape(eeg[0][k])[0]
			nsamp2 = np.shape(eeg[1][k])[0]
			nsamp = np.min((nsamp1,nsamp2))
			for p in range(2): # for each subject
				trialdata = avref(eeg[p][k][0:nsamp,0:32]) 
				trialdata = segmenteegdata(trialdata,epoch,sr)
				trialdata = signal.detrend(trialdata,axis =1)
				dataf = fft(trialdata,axis = 1)
				fcoef = dataf[:,1:maxbin+1,:]/(epoch*sr)/epoch
				power,amplitude_corr,cross_spectrum,coherence = \
					spectralmatrix(fcoef)
				power_mat[p,k,:,:] = power
				amp_corr_mat[p,k,:,:,:] = amplitude_corr
				c_spect_mat[p,k,:,:,:] = cross_spectrum
				coh_mat[p,k,:,:,:] = coherence
				if pickfile==3 and k==7 and p==0: 
					fcoef=np.delete(fcoef,range(110,111),0) # remove the bad epoch
				pcorr,covmat,alphas = \
					partialcorrelation(np.abs(fcoef),ncv=2)
				pcorr_mat[p,k,:,:,:] = pcorr
				pcorr_cov_mat[p,k,:,:,:] = covmat
				pcorr_alphas[p,k,:,] = alphas
		savename = 'pcorr/' + matfile[pickfile][:-4]+'_pcorr.mat'
		outdict = dict()
		outdict['power'] = power_mat
		outdict['empirical_amplitude_correlation'] = amp_corr_mat
		outdict['cross_spectrum'] = c_spect_mat 
		outdict['coherence'] = coh_mat 
		outdict['partial_correlation'] = pcorr_mat
		outdict['amplitude_correlation'] = pcorr_cov_mat
		outdict['alphas'] = pcorr_alphas
		outdict['session'] = session
		outdict['session_types'] = sessionTypes
		outdict['condition'] = condition_index
		outdict['condition_names'] = condition_Names
		outdict['channels'] = channels
		outdict['channel_labels'] = chan_labels
		outdict['intervals'] = intervals
		savemat(savename,outdict,store_python_metadata=True)
	except:
		pass




  precision_[indices != idx, idx] = -precision_[idx, idx] * coefs
  precision_[idx, indices != idx] = -precision_[idx, idx] * coefs
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  precision_[indices != idx, idx] = -precision_[idx, idx] * coefs
  precision_[idx, indices != idx] = -precision_[idx, idx] * coefs
  x = asanyarray(arr - arrmean)
  precision_[indices != idx, idx] = -precision_[idx, idx] * coefs
  precision_[idx, indices != idx] = -precision_[idx, idx] * coefs
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrme