# Introduction of features commonly used for EEG signal analysis

In this notebook, I will introduce the feature for analysis of EEG signal.

The features I will introduce are:

1. **Permutation entropy**
2. **Spectral entropy**
3. **Singular value decomposition entropy**
4. **Hjorth mobility and complexity**
5. **Number of zero-crossings**
6. **Petrosian fractal dimension**
7. **Katz fractal dimension**
8. **Higuchi fractal dimension**
9. **Detrended fluctuation analysis**


In the last cell I visualized these features using UMAP

If you want more information, please visit [documentation of antropy package](https://raphaelvallat.com/antropy/build/html/index.html)

### Please Upvote if you Find this Useful :)

# antropy is used for feature calculation

In [None]:
!pip install antropy

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import antropy as ant
import pywt
from tqdm.notebook import tqdm
import seaborn as sns

In [None]:
df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/train.csv")
TARGETS = df.columns[-6:]
print('Train shape:', df.shape )
print('Targets', list(TARGETS))
df.head()
train = df.groupby('eeg_id')[['spectrogram_id','spectrogram_label_offset_seconds']].agg(
    {'spectrogram_id':'first','spectrogram_label_offset_seconds':'min'})
train.columns = ['spec_id','spectrogram_min']

tmp = df.groupby('eeg_id')[['spectrogram_id','spectrogram_label_offset_seconds']].agg(
    {'spectrogram_label_offset_seconds':'max'})
train['spectrogram_max'] = tmp

tmp = df.groupby("eeg_id")["eeg_label_offset_seconds"].agg("min")
train["eeg_min"] = tmp
tmp = df.groupby("eeg_id")["eeg_label_offset_seconds"].agg("max")
train["eeg_max"] = tmp


tmp = df.groupby('eeg_id')[['patient_id']].agg('first')
train['patient_id'] = tmp

tmp = df.groupby('eeg_id')[TARGETS].agg('sum')
for t in TARGETS:
    train[t] = tmp[t].values

y_data = train[TARGETS].values
y_data = y_data / y_data.sum(axis=1,keepdims=True)
train[TARGETS] = y_data

tmp = df.groupby('eeg_id')[['expert_consensus']].agg('first')
train['target'] = tmp

train = train.reset_index()
print('Train non-overlapp eeg_id shape:', train.shape )
train

In [None]:
train["ideal"] = False
train["ideal"] = train.apply((lambda row: row[str.lower(row["target"])+"_vote"]==1.0), axis=1)
train["ideal"].value_counts()
train_ideal = train.query("ideal==1")

target = ["seizure_vote", "lpd_vote", "gpd_vote", "lrda_vote", "grda_vote", "other_vote"]

eegs = {key:[] for key in target}

for t in target:
    
    train_ideal_target = train_ideal[train_ideal[t]==1.0]
    for j in range(10):
        eegs[t] = train_ideal_target["eeg_id"].sample(n=50, random_state=42).to_list()
        
train = train.set_index("eeg_id")

In [None]:
def maddest(d, axis=None):
    return np.mean(np.absolute(d - np.mean(d, axis)), axis)

def denoise(x, wavelet='haar', level=1):
    ret = {key:[] for key in x.columns}
    
    for pos in x.columns:
        coeff = pywt.wavedec(x[pos], wavelet, mode="per")
        sigma = (1/0.6745) * maddest(coeff[-level])

        uthresh = sigma * np.sqrt(2*np.log(len(x)))
        coeff[1:] = (pywt.threshold(i, value=uthresh, mode='hard') for i in coeff[1:])

        ret[pos]=pywt.waverec(coeff, wavelet, mode='per')
    
    return pd.DataFrame(ret)

BR_ORDER = ["Fp1-F7", "F7-T3",   "T3-T5",   "T5-O1",
            "Fp1-F3",  "F3-C3",   "C3-P3",   "P3-O1",
            "Fp2-F4",  "F4-C4",  "C4-P4",   "P4-O2",
            "Fp2-F8",  "F8-T4" ,  "T4-T6",   "T6-O2",
            "Fz-Cz",   "Cz-Pz"]
TARGET = ["seizure_vote", "lpd_vote", "gpd_vote", "lrda_vote", "grda_vote", "other_vote"]

def get_bipolar_referenced_eeg(eeg):

    ret = {key:[] for key in BR_ORDER}

    for bipos in BR_ORDER:
        pos_a, pos_b = bipos.split("-")[0], bipos.split("-")[1]
        ret[bipos] = eeg[pos_a] - eeg[pos_b]
    
    return pd.DataFrame(ret)

def get_denoised_eeg_signal(eeg_id):
    path = "/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/"
    eeg = pd.read_parquet(path + str(eeg_id) + ".parquet")
    eeg = denoise(eeg)
    eeg = get_bipolar_referenced_eeg(eeg)
    return eeg

def apply_function_to_eeg(eeg, features, feat_name, func, args):

    for pos in eeg.columns:
        try:
            feat = func(eeg[pos], **args)
        except:
            feat = np.nan
            
        
        if feat_name=="hjorth_entropy":
            features[feat_name+"_"+"m"+"_"+pos].append(feat[0])
            features[feat_name+"_"+"c"+"_"+pos].append(feat[1])
        else:
            features[feat_name+"_"+pos].append(feat)
    return features

def get_feature(eegs, df, feat_name, func, args):
    
    if feat_name == "hjorth_entropy":
        feat_brorder = [feat_name+"_"+f+"_"+pos for f in ["m","c"] for pos in BR_ORDER]
    else:
        feat_brorder = [feat_name+"_"+pos for pos in BR_ORDER]
    
    features = {key:[] for key in feat_brorder}
    features["eeg_id"] = []
    features["target"] = []
    
    for t in target:
        eeg_list = eegs[t]
        print(f"Get {t} feature")
        for eeg_id in tqdm(eeg_list):
            features["eeg_id"].append(eeg_id)
            features["target"].append(t)
            
            eeg = get_denoised_eeg_signal(eeg_id)
            features = apply_function_to_eeg(eeg, features, feat_name, func, args)
    
    features = pd.DataFrame(features)
    
    features = features.fillna(features.median(numeric_only=True))
    
    return pd.DataFrame(features)

In [None]:
all_features = []

# Permutaion entropy
 

In [None]:
# Permutation entropy
features =  get_feature(eegs, df=train, feat_name="Permutaion_entropy", func=ant.perm_entropy, args={"normalize":1})
all_features.append(features)

# Spectral entropy

In [None]:
# Spectral entropy
features =  get_feature(eegs, df=train, feat_name="Spectral_entropy", func=ant.spectral_entropy, args=dict(sf=100, method='welch', normalize=True))
all_features.append(features)

# Singular value decomposition entropy

In [None]:
# Singular value decomposition entropy
features =  get_feature(eegs, df=train, feat_name="SVD_entropy", func=ant.svd_entropy, args=dict(normalize=True))
all_features.append(features)

# Hjorth mobility and complexity

In [None]:
# Hjorth mobility and complexity
features =  get_feature(eegs, df=train, feat_name="hjorth_entropy", func=ant.hjorth_params, args=dict())
all_features.append(features)

# Number of zero-crossings

In [None]:
# Number of zero-crossings
features =  get_feature(eegs, df=train, feat_name="zerocross", func=ant.num_zerocross, args=dict())
all_features.append(features)

# Petrosian fractal dimension

In [None]:
# Petrosian fractal dimension
features =  get_feature(eegs, df=train, feat_name="petrosian_fd", func=ant.petrosian_fd, args=dict())
all_features.append(features)

# Katz fractal dimension

In [None]:
# Katz fractal dimension
features =  get_feature(eegs, df=train, feat_name="katz_fd", func=ant.katz_fd, args=dict())
all_features.append(features)

# Higuchi fractal dimension

In [None]:
# Higuchi fractal dimension
features =  get_feature(eegs, df=train, feat_name="higuchi_fd", func=ant.higuchi_fd, args=dict())
all_features.append(features)

# Detrended fluctuation analysis

In [None]:
# Detrended fluctuation analysis
features =  get_feature(eegs, df=train, feat_name="detrended_fluction", func=ant.detrended_fluctuation, args=dict())
all_features.append(features)

# Visualization using UMAP

In [None]:
all_feature = pd.concat(all_features, axis=1).drop(["eeg_id", "target"], axis=1)
all_feature.head()

In [None]:
import umap


mapper = umap.UMAP(random_state=42,
                   n_neighbors=128,
                   min_dist=0.99,
                   metric="mahalanobis")
embedding = mapper.fit_transform(all_feature)

embedding_x = embedding[:,0]
embedding_y = embedding[:,1]

c_list = ["r", "b", "y", "g", "m", "c"]
for i, t in enumerate(TARGET):
    plt.scatter(embedding_x[50*i:50*(i+1)], embedding_y[50*i:50*(i+1)], c=c_list[i], label=t)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
plt.show()