In [4]:
from tqdm import tqdm
import numpy as np
import pandas as pd
from utils import extract_basic_features

import wfdb
import wfdb.processing as wp
import matplotlib.pyplot as plt
from scipy import signal
from utils import find_noise_features, extract_basic_features
import shutil

import time
from scipy import sparse
import os
import warnings
warnings.filterwarnings("ignore")
import scipy.io as sio
import pywt

from resnet_ecg.utils import one_hot,get_batches
from resnet_ecg.ecg_preprocess import ecg_preprocessing

In [5]:
train_dataset_path = os.getcwd()+"/Train/"
val_dataset_path = os.getcwd()+"/Val/"

train_files = os.listdir(train_dataset_path)
train_files.sort()
val_files = os.listdir(val_dataset_path)
val_files.sort()

In [6]:
labels = pd.read_csv("reference.csv")
labels.head()

Unnamed: 0,File_name,label1,label2,label3,label4,label5,label6,label7,label8
0,TRAIN0001,8,,,,,,,
1,TRAIN0002,8,,,,,,,
2,TRAIN0003,8,,,,,,,
3,TRAIN0004,8,,,,,,,
4,TRAIN0005,8,,,,,,,


In [7]:
def wavelet(ecg,wavefunc,lv,m,n):   #
    
    coeff = pywt.wavedec(ecg,wavefunc,mode='sym',level=lv)   #
    #sgn = lambda x: 1 if x > 0 else -1 if x < 0 else 0

    for i in range(m,n+1):  
        cD = coeff[i]
        for j in range(len(cD)):
            Tr = np.sqrt(2*np.log(len(cD)))  
            if cD[j] >= Tr:
                coeff[i][j] = np.sign(cD[j]) - Tr 
            else:
                coeff[i][j] = 0   
                
    denoised_ecg = pywt.waverec(coeff,wavefunc)
    return denoised_ecg

In [8]:
def wavelet_db6(sig):
    """
    R J, Acharya U R, Min L C. ECG beat classification using PCA, LDA, ICA and discrete
     wavelet transform[J].Biomedical Signal Processing and Control, 2013, 8(5): 437-448.

    param sig: 1-D numpy Array
    return: 1-D numpy Array
    """

    coeffs = pywt.wavedec(sig, 'db6', level=9)

    coeffs[-1] = np.zeros(len(coeffs[-1]))

    coeffs[-2] = np.zeros(len(coeffs[-2]))

    coeffs[0] = np.zeros(len(coeffs[0]))

    sig_filt = pywt.waverec(coeffs, 'db6')

    return sig_filt

In [42]:
ecg = sio.loadmat(os.path.join(train_dataset_path,'TRAIN0051'))#TRAIN0151  TRAIN5151

ecg_data = wavelet(ecg["I"][0],'db4',4,2,4)

hard_peaks, soft_peaks = wp.find_peaks(ecg_data)
qrs_inds = wp.xqrs_detect(ecg_data, 500)
# # print hard_peaks
plt.figure(figsize=(18,12))
for i in range(4):
    plt.subplot(4,1,i+1)
    if i == 0:
        plt.plot(ecg["I"][0])
    elif i == 1:
        plt.plot(ecg_preprocessing(ecg["I"][0][:5000].reshape(1,5000), 'sym8', 8, 3, 500)[0,:])
    elif i == 2:
        plt.plot(ecg_data)
    elif i == 3:
        plt.plot(wavelet_db6(ecg["I"][0])) 
        
    #plt.scatter(hard_peaks, ecg_data[hard_peaksks], marker='o', c='r')
    plt.scatter(qrs_inds, ecg_data[qrs_inds], marker='o', c='g')
    #plt.plot(soft_peaks)
#plt.show()

Learning initial signal parameters...
Failed to find 8 beats during learning.
Initializing using default parameters
Running QRS detection...
QRS detection complete.


In [52]:
wavelet(ecg["I"][0],'db4',4,2,4)

array([-0.05184335, -0.09155027, -0.13294368, ...,  0.05536977,
        0.04953838,  0.04703561])

In [53]:
ecg["I"][0].shape

(5000,)

In [16]:
def read_data_labels_enhance(data_path, split = "Train",preprocess=True):
    """ Read data """

    # Fixed params
    n_class = 2
    n_steps = 3000#2560

    # Paths
    path_signals = os.path.join(data_path, split)

    # Read labels and one-hot encode
    labels = pd.read_csv("reference.csv")
    #2156+224+672+654+180+826+534+504+1953
    
    # Read time-series data
    channel_files = os.listdir(path_signals)
    #print(channel_files)
    channel_files.sort()
    n_channels = 12#len(channel_files)
    #posix = len(split) + 5

    # Initiate array
    list_of_channels = []
    X = np.zeros((len(channel_files), n_steps, n_channels))
    i_ch = 0
    
    data_x = []
    data_y = []
    
    channel_name = ['V6', 'aVF', 'I', 'V4', 'V2', 'aVL', 'V1','II', 'aVR', 'V3', 'III', 'V5']
    channel_mid_name = ['II','aVR','V2','V5']
    channel_post_name = ['III','aVF','V3','V6']
    
    for i_ch,fil_ch in tqdm(enumerate(channel_files[:])):
        #print(fil_ch)
        labels_list = labels.iloc[i_ch].values[:]#.dropna()
        #print(labels_list)
        ecg = sio.loadmat(os.path.join(path_signals,fil_ch))
        
        if True:#7 in labels_list[1:] or 4 in labels_list[1:]:
            for i_filter in range(3):
                data_y.append(list(labels_list))
                
                ecg_channels = np.zeros((n_steps, n_channels))

                for i_n,ch_name in enumerate(channel_name[:]):

                    # method 1
                    '''  '''
                    if ch_name in channel_mid_name:
                        mid_ind = int(ecg[ch_name].T.shape[0]/2)
                        ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[mid_ind-2500:mid_ind+2500],n_steps).T 
                    elif ch_name in channel_post_name:
                        ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[-5000:],n_steps).T
                    else:
                        ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[:5000],n_steps).T
                    
                    #method 2
                    #ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T,n_steps).T
                    
                    #print(ecg_channels[:,i_n].shape)
                    if preprocess and i_filter == 0:
                        data = ecg_preprocessing(ecg_channels[:,i_n].reshape(1,n_steps),'sym8',8,3,n_steps/10)
                        ecg_channels[:,i_n] = data[0]#ecg['data']
                    elif i_filter == 1:
                        ecg_channels[:,i_n] = wavelet(ecg_channels[:,i_n],'db4',4,2,4)[0]
                    elif i_filter == 1:
                        ecg_channels[:,i_n] = wavelet_db6(ecg_channels[:,i_n].reshape(1,n_steps))[0]
                    else:
                        pass
                        #ecg_channels[:,i_n] = ecg_channels[:,i_n]
                X[i_ch,:,:] = ecg_channels
                data_x.append(ecg_channels)
        else:
                data_y.append(list(labels_list))
                ecg_channels = np.zeros((n_steps, n_channels))
                for i_n,ch_name in enumerate(channel_name[:]):

                    # method 1
                    '''
                    if ch_name in channel_mid_name:
                        mid_ind = int(ecg[ch_name].T.shape[0]/2)
                        ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[mid_ind-2500:mid_ind+2500],2560).T 
                    elif ch_name in channel_post_name:
                        ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[-5000:],2560).T
                    else:
                        ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[:5000],2560).T
                    '''
                    #method 2
                    ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T,n_steps).T

                    if preprocess:
                        data = ecg_preprocessing(ecg_channels[:,i_n].reshape(1,n_steps),'sym8',8,3,n_steps/10)
                        ecg_channels[:,i_n] = data[0]#ecg['data']
                    else:
                        pass

                X[i_ch,:,:] = ecg_channels
                data_x.append(ecg_channels)

    # Return 
    return np.array(data_x).astype('float32'),data_y#X

In [17]:
train_x,train_y = read_data_labels_enhance(os.getcwd())

6500it [19:12,  5.41it/s]


In [18]:
train_x.shape

(19500, 3000, 12)

In [19]:
label_columns_name =['File_name', 'label1', 'label2',
                    'label3', 'label4', 'label5', 'label6', 'label7', 'label8']
labels_en = pd.DataFrame(columns=label_columns_name,index=range(len(train_y)),data=train_y)
labels_en.head()

Unnamed: 0,File_name,label1,label2,label3,label4,label5,label6,label7,label8
0,TRAIN0001,8,,,,,,,
1,TRAIN0001,8,,,,,,,
2,TRAIN0001,8,,,,,,,
3,TRAIN0002,8,,,,,,,
4,TRAIN0002,8,,,,,,,


In [20]:
labels_en.shape

(19500, 9)

In [21]:
def compute_labels(labels):
    la_list = []
    column_name = ['File_name','label1','label2','label3','label4','label5','label6','label7','label8']
    for num in range(9):
        temp =pd.DataFrame(columns=column_name,index=range(labels.shape[0]))
        temp['File_name'] = labels['File_name']
        label_pd = labels[column_name].where(labels[column_name] == num,temp).dropna(axis=0,how='all')
        labels_pd = label_pd.dropna(axis=0,thresh=2)
        labels_pd['label1'] = labels_pd.shape[0]*[1]
        labels_pd.dropna(axis=1,inplace=True)

        labels_pd.columns = ['File_name1', 'label1']
        la_list.append(labels_pd.shape[0])
    return la_list

In [22]:
compute_labels(labels_en)

[5859, 1512, 1602, 2478, 540, 1962, 2016, 672, 6468]

In [111]:
#labels        0    1   2   3  4   5   6   7   8
#original    1953+504+534+826+180+654+672+224+2156      7703

#enhance 1   1953+1512+534+962+216+742+674+228+2576     9397

#enhance 4 7 [1953,544,606,974,540,696,716,672,2240]    8941


9397

In [23]:
np.save('en_all_train_x.npy',train_x)

In [24]:
labels_en.to_csv("en_all_labels.csv",index=False)

In [11]:
def read_test_data(data_path, split = "Val",preprocess=True):
    """ Read data """

    # Fixed params
    n_class = 2
    n_steps = 3000 #2560

    # Paths
    path_signals = os.path.join(data_path, split)

    # Read time-series data
    channel_files = os.listdir(path_signals)
    #print(channel_files)
    channel_files.sort()
    n_channels = 12#len(channel_files)

    # Initiate array
    list_of_channels = []
    #X = np.zeros((len(channel_files), n_steps, n_channels))
    
    i_ch = 0
    
    data_x_w1 = []
    data_x_w2 = []
    data_x_w3 = []
    
    data_x = []
    data_y = []
    
    channel_name = ['V6', 'aVF', 'I', 'V4', 'V2', 'aVL', 'V1','II', 'aVR', 'V3', 'III', 'V5']
    channel_mid_name = ['II','aVR','V2','V5']
    channel_post_name = ['III','aVF','V3','V6']
    
    for i_ch,fil_ch in tqdm(enumerate(channel_files[:])):
        #print(fil_ch)

        ecg = sio.loadmat(os.path.join(path_signals,fil_ch))
        
        if True:#7 in labels_list[1:] or 4 in labels_list[1:]:
            for i_filter in range(3):
                
                ecg_channels_w1 = np.zeros((n_steps, n_channels))
                ecg_channels_w2 = np.zeros((n_steps, n_channels))
                ecg_channels_w3 = np.zeros((n_steps, n_channels))
                ecg_channels    = np.zeros((n_steps, n_channels))
                
                for i_n,ch_name in enumerate(channel_name[:]):

                    # method 1
                    '''  
                    if ch_name in channel_mid_name:
                        mid_ind = int(ecg[ch_name].T.shape[0]/2)
                        ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[mid_ind-2500:mid_ind+2500],n_steps).T 
                    elif ch_name in channel_post_name:
                        ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[-5000:],n_steps).T
                    else:
                        ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[:5000],n_steps).T
                    '''
                    #method 2
                    ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T,n_steps).T
                    
                    #print(ecg_channels[:,i_n].shape)
                    if preprocess and i_filter == 0:
                        data = ecg_preprocessing(ecg_channels[:,i_n].reshape(1,n_steps),'sym8',8,3,n_steps/10)
                        ecg_channels_w1[:,i_n] = data[0]#ecg['data']
                    elif i_filter == 1:
                        ecg_channels_w2[:,i_n] = wavelet(ecg_channels[:,i_n],'db4',4,2,4)[0]
                    elif i_filter == 1:
                        ecg_channels_w3[:,i_n] = wavelet_db6(ecg_channels[:,i_n].reshape(1,n_steps))[0]
                    else:
                        pass
                        #ecg_channels[:,i_n] = ecg_channels[:,i_n]
                #X[i_ch,:,:] = ecg_channels
                data_x_w1.append(ecg_channels_w1)
                data_x_w2.append(ecg_channels_w2)
                data_x_w3.append(ecg_channels_w3)
        else:
            ecg_channels = np.zeros((n_steps, n_channels))
            for i_n,ch_name in enumerate(channel_name[:]):

                # method 1
                '''
                if ch_name in channel_mid_name:
                    mid_ind = int(ecg[ch_name].T.shape[0]/2)
                    ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[mid_ind-2500:mid_ind+2500],2560).T 
                elif ch_name in channel_post_name:
                    ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[-5000:],2560).T
                else:
                    ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T[:5000],2560).T
                '''
                #method 2
                ecg_channels[:,i_n] = signal.resample(ecg[ch_name].T,n_steps).T

                if preprocess:
                    data = ecg_preprocessing(ecg_channels[:,i_n].reshape(1,n_steps),'sym8',8,3,n_steps/10)
                    ecg_channels[:,i_n] = data[0]#ecg['data']
                else:
                    pass

            #X[i_ch,:,:] = ecg_channels
            data_x.append(ecg_channels)

    # Return 
    return np.array(data_x_w1).astype('float32'),np.array(data_x_w2).astype('float32'),np.array(data_x_w3).astype('float32')#X

In [12]:
x,y,z = read_test_data(os.getcwd())

500it [01:29,  5.81it/s]


In [13]:
x.shape

(1500, 3000, 12)

In [14]:
y.shape

(1500, 3000, 12)

In [15]:
z.shape

(1500, 3000, 12)

In [3]:
91*500

45500