In [1]:
import urllib.request
import numpy as np
import pyedflib

def PhysioNetMIConvert(file_name,show_info=False):
    '''
    Parameters
    ————————————————————————————————————————————————————
        file_name :    str
                    edf文件名

    Return
    ————————————————————————————————————————————————————
        masterSet : 2D array (channels x nsamples)
                    edf文件数据  
                    第1个通道是时间戳
                    第2个通道是标签
                    第3-66个通道是电极通道
    '''
    

    reader = pyedflib.EdfReader(file_name)
    annotations = reader.readAnnotations()
    end_time =  annotations[0][-1]+annotations[1][-1]
    intervals = np.append(annotations[0],end_time)   #删除最后半秒的全零数据，将间隔点在末尾添加124.5标志
    
    timeArray = np.array([round(x,5) for x in np.arange(0,end_time,.00625)])
    time_points = int(end_time*160)
    timeArray = timeArray.reshape(time_points,1)   #一共124.5s，采样率160hz，共19920个数据点

    codes = annotations[2]     #codes为事件标志位
    codeArray = []             #codeArray为每一个数据点所代表的事件标志位
    counter = 1     
    for timeVal in timeArray:
        if timeVal == end_time:
            break   
        elif timeVal / intervals[counter] == 1.0:
            counter += 1

        codeArray.append(codes[counter - 1])
    

    invertCodeArray = np.array(codeArray).reshape(time_points,1)
    numSignals = reader.signals_in_file   #数据的通道数（电极数）
    
    signal_labels = reader.getSignalLabels()  #数据通道的标签（电极标签）

    dataset = np.zeros((numSignals, reader.getNSamples()[0]))
    for signal in np.arange(numSignals):
        dataset[signal, :] = reader.readSignal(signal)

    dataset = dataset[:,:time_points].transpose()


    masterSet = np.concatenate((timeArray,invertCodeArray,dataset),axis=1).swapaxes(0,1)


    if show_info:
        print('annotations \n',annotations)  
        print('intervals values \n',intervals)
        print('codeArray value \n',codeArray)
        print('all channels number :',numSignals)
        print('channels labels :',signal_labels)
        print('all file samples :',reader.getNSamples()[0])
        print('masterSet :',masterSet.shape)

    return masterSet

In [2]:
file_name = 'data/train/S001/S001R03.edf'

'''
reader = pyedflib.EdfReader(file_name)
annotations = reader.readAnnotations()
print(annotations)
'''

raw_data = PhysioNetMIConvert(file_name)




In [3]:
raw_data

array([['0.0', '0.00625', '0.0125', ..., '124.48125', '124.4875',
        '124.49375'],
       ['T0', 'T0', 'T0', ..., 'T1', 'T1', 'T1'],
       ['-57.0', '-49.0', '-55.0', ..., '23.0', '38.0', '50.0'],
       ...,
       ['-56.0', '-70.0', '-77.0', ..., '97.0', '81.0', '32.0'],
       ['-124.0', '-149.0', '-153.0', ..., '156.0', '140.0', '78.0'],
       ['-28.0', '-40.0', '-37.0', ..., '75.0', '66.0', '16.0']],
      dtype='<U32')

In [4]:



def extractData(raw_data,time_range=[0,4],sample_rate=160):
    '''
        对从PhysioNetMIConvert函数中取出的数进行信号拆解,输出T1和T2类别的数据
    
    Parameters
    ——————————————————————————————————————————————
        raw_data : 2D array (channels x nsamples)
                    PhysioNetMIConvert函数中提取的原始数据
        time_range : list 
                    取出数据段的起始点和终止点  ,单位s
        sample_rate : float
                    数据的采样率

    Returns
    ——————————————————————————————————————————————
        retval : dict
                T1和T2数据   格式为 3D array (ntrials x nchannels x nsamples)
    '''


    start = sample_rate * time_range[0]
    end = sample_rate * time_range[1]
    left_data = []
    idx = 0
    data = raw_data
    while data.shape[1]>0: 
        marker = data[1,:].tolist()
        if 'T1' not in marker:
            break
        idx = marker.index('T1')
        #print('idx : ',idx,'   data shape : ',data.shape)
        l_data = []
        i = idx 
        while marker[i] == 'T1':
            l_data.append(data[2:,i])
            i = i + 1
            if i >= data.shape[1]:
                break
        idx = i
        data = data[:,idx:]
        if len(l_data) >= int(end-start) : 
            left_data.append(l_data[start:end])

    left_data_np = np.array(left_data,dtype=np.float32)
    left_data_np = left_data_np.swapaxes(1,2)


    right_data = []
    idx = 0
    data = raw_data
    while data.shape[1]>0: 
        marker = data[1,:].tolist()
        if 'T2' not in marker:
            break
        idx = marker.index('T2')
        
        r_data = []
        i = idx 
        while marker[i] == 'T2':
            r_data.append(data[2:,i])
            i = i + 1
            if i >= data.shape[1]:
                break
        idx = i
        data = data[:,idx:]
        if len(r_data) >= int(end-start) :
            right_data.append(r_data[start:end])

    right_data_np = np.array(right_data,dtype=np.float32)
    right_data_np = right_data_np.swapaxes(1,2)


    retval = {'T1':left_data_np,'T2':right_data_np}

    return retval


In [5]:
from tqdm import tqdm 
def concatenateAllData(base_dir):
    '''
        获取全部被试数据  剔除第88,第92和第100个数据异常的被试
    Parameters
    ————————————————————————————————————————————————
    base_dir : str
                基础数据集文件夹
    
    Returns
    ————————————————————————————————————————————————
    all_left : List 
            左手 的全部run的数据 all_left[n]表示第n个被试数据  3D array (ntrials x nchannels x nsamples) 
    all_right : List
    all_fist : List
    all_feet : List
    
    '''
    left_right_runs = [3,4,7,8,11,12]
    fist_feet_runs = [5,6,9,10,13,14]

    all_left = []
    all_right = []
    all_fist = []
    all_feet = []
    for i in tqdm(range(109)):
        
        nsub = i + 1
        #print(nsub)
        if nsub == 92 or nsub==100 or nsub==88:

            continue

        for nrun in range(3,15):
            
            sub_file_name = base_dir + '/S' + '{:03d}'.format(nsub) + '/S' +  '{:03d}'.format(nsub) + 'R' + '{:02d}'.format(nrun) + '.edf'
            #print(sub_file_name)
            data = extractData(PhysioNetMIConvert(sub_file_name))
            if nrun in left_right_runs:
                if nrun == 3:
                    left = data['T1']
                    right = data['T2']
                else:
                    left = np.concatenate((left,data['T1']))
                    right = np.concatenate((right,data['T2']))
            elif nrun in fist_feet_runs:
                if nrun == 5:
                    fist = data['T1']
                    feet = data['T2']
                else:
                    fist = np.concatenate((fist,data['T1']))
                    feet = np.concatenate((feet,data['T2']))

        all_left.append(left)
        all_right.append(right)
        all_fist.append(fist)
        all_feet.append(feet) 
        
    print(len(all_left),len(all_right),len(all_fist),len(all_feet))

    return all_left,all_right,all_fist,all_feet                 
                



In [6]:
left_data,right_data,fist_data,feet_data = concatenateAllData('data/train')

100%|██████████| 109/109 [13:16<00:00,  7.31s/it]

106 106 106 106





In [7]:
import models.CSP as CSP

ALL_ACC = []
for i in range(len(left_data)):
    data = np.concatenate((left_data[i],right_data[i]),axis=0)
    label = np.concatenate((np.zeros((left_data[i].shape[0],)),np.ones((right_data[i].shape[0],))))

    data_process = CSP.cheb_bandpass_filter(data,8,30,160)

    csp_num = 5

    #print(data_process.shape)


    from sklearn.model_selection import StratifiedShuffleSplit
    sss = StratifiedShuffleSplit(n_splits=5, test_size=0.3, train_size=0.7, random_state=1)


    from sklearn import svm

    ACC = []



    for train_idx,test_idx in sss.split(data,label):
        
        X_train = data_process[train_idx,:,:]
        y_train = label[train_idx]
        X_test = data_process[test_idx,:,:]
        y_test = label[test_idx]



        csp_mat = CSP.cal_csp(X_train[y_train==0],X_train[y_train==1],num=csp_num)
        F_train =  CSP.cal_feature(csp_mat,X_train,num=csp_num)
        F_test = CSP.cal_feature(csp_mat,X_test,num=csp_num)

        svm_clf = svm.SVC(C=1, kernel='linear', probability=False)
        svm_clf.fit(F_train,y_train)

        pred = svm_clf.predict(F_test)

        p = np.array(pred)
        accuracy = np.sum(p == y_test) / p.shape[0]
        ACC.append(accuracy)


    #print('ALL ACC :',ACC)
    ALL_ACC.append(np.mean(ACC))
    print("subject : %d  , mean ACC : %.3f %%" % (i,np.mean(ACC)*100))
print('all sub mean acc : %.3f %% '%(np.mean(ALL_ACC)*100))
print('Done!')



subject : 0  , mean ACC : 78.519 %
subject : 1  , mean ACC : 68.889 %
subject : 2  , mean ACC : 63.704 %
subject : 3  , mean ACC : 68.889 %
subject : 4  , mean ACC : 54.815 %
subject : 5  , mean ACC : 55.556 %
subject : 6  , mean ACC : 99.259 %
subject : 7  , mean ACC : 46.667 %
subject : 8  , mean ACC : 44.444 %
subject : 9  , mean ACC : 68.148 %
subject : 10  , mean ACC : 68.889 %
subject : 11  , mean ACC : 55.556 %
subject : 12  , mean ACC : 65.926 %
subject : 13  , mean ACC : 59.259 %
subject : 14  , mean ACC : 66.667 %
subject : 15  , mean ACC : 48.889 %
subject : 16  , mean ACC : 51.111 %
subject : 17  , mean ACC : 47.407 %
subject : 18  , mean ACC : 67.407 %
subject : 19  , mean ACC : 65.926 %
subject : 20  , mean ACC : 65.926 %
subject : 21  , mean ACC : 85.185 %
subject : 22  , mean ACC : 57.778 %
subject : 23  , mean ACC : 62.222 %
subject : 24  , mean ACC : 67.407 %
subject : 25  , mean ACC : 80.741 %
subject : 26  , mean ACC : 57.037 %
subject : 27  , mean ACC : 50.370 %
su

In [10]:
for nsub in range(len(left_data)):
    if nsub == 0:
        allSubLeftData = left_data[nsub]
    else:
        allSubLeftData = np.concatenate((allSubLeftData,left_data[nsub]),axis=0)
print('allSubLeftData shape : ',allSubLeftData.shape)

for nsub in range(len(right_data)):
    if nsub == 0:
        allSubRightData = right_data[nsub]
    else:
        allSubRightData = np.concatenate((allSubRightData,right_data[nsub]),axis=0)
print('allSubRightData shape : ',allSubRightData.shape)

for nsub in range(len(fist_data)):
    if nsub == 0:
        allSubFistData = fist_data[nsub]
    else:
        allSubFistData = np.concatenate((allSubFistData,fist_data[nsub]),axis=0)
print('allSubFistData shape : ',allSubFistData.shape)

for nsub in range(len(feet_data)):
    if nsub == 0:
        allFeetLeftData = feet_data[nsub]
    else:
        allFeetLeftData = np.concatenate((allSubLeftData,feet_data[nsub]),axis=0)
print('allFeetLeftData shape : ',allFeetLeftData.shape)

allSubLeftData shape :  (4800, 64, 640)
allSubRightData shape :  (4744, 64, 640)
allSubFistData shape :  (4751, 64, 640)
allFeetLeftData shape :  (4843, 64, 640)


In [1]:
import torch.nn as nn
import torch
m = nn.AdaptiveAvgPool1d(5)
input = torch.randn(1, 64, 8)
output = m(input)

In [2]:
output.shape

torch.Size([1, 64, 5])

In [6]:
import numpy as np

x = np.arange(24).reshape(4,6)
x[3,0] = 11
print(x)
print('---------------')
print(np.mean(x,axis=1))
print(np.std(x,axis=1))

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [11 19 20 21 22 23]]
---------------
[ 2.5         8.5        14.5        19.33333333]
[1.70782513 1.70782513 1.70782513 3.94405319]


In [7]:
u = np.mean(x,axis=1)

ValueError: operands could not be broadcast together with shapes (4,6) (4,) 