## I. Dataset Preparation

In [5]:
#2000 samples for 5 runs
samples = 2000
#-7 to 2
snr_desired = -6

In [6]:
!pip install EMD-signal
!pip install memory-profiler

You should consider upgrading via the '/Users/zeespanto/PycharmProjects/ee/venv/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/Users/zeespanto/PycharmProjects/ee/venv/bin/python -m pip install --upgrade pip' command.[0m


In [7]:
import random
import numpy as np
from PyEMD import EMD
import matplotlib.pyplot as plt
import memory_profiler
import time
from memory_profiler import memory_usage
import psutil
from sklearn.metrics import mean_squared_error

In [8]:
eeg = np.load('EEGDenoiseNet/EEG_all_epochs.npy')
print(f"------EEG-----\nSize: {eeg.size}\nRow count: {len(eeg)}\nColumn count: {len(eeg[0])}\nFirst Column: {eeg[:, 0]}\n\n")

emg = np.load('EEGDenoiseNet/EMG_all_epochs.npy')
print(f"------EMG(Heart)-----\nSize: {emg.size}\nRow count: {len(emg)}\nColumn count: {len(emg[0])}\nFirst Column: {emg[:, 0]}\n\n")

eog = np.load('EEGDenoiseNet/EOG_all_epochs.npy')
print(f"------EOG(Ocular)-----\nSize: {eog.size}\nRow count: {len(eog)}\nColumn count: {len(eog[0])}\nFirst Column: {eog[:, 0]}")

------EEG-----
Size: 2311168
Row count: 4514
Column count: 512
First Column: [184.5070843  171.96198926 229.56731921 ... 317.59704985 262.89154388
 216.07429779]


------EMG(Heart)-----
Size: 2866176
Row count: 5598
Column count: 512
First Column: [20245.96672667 23595.64263225 34991.76745427 ...  1490.61150022
   -38.705385    1260.54203952]


------EOG(Ocular)-----
Size: 1740800
Row count: 3400
Column count: 512
First Column: [  7.30828446  -1.68701752  12.4808031  ...   6.81209745 298.19922839
 -44.54226777]


## II. Contaminated Signal Generation

In [9]:
def get_random_rows(dataset, num_rows):
    num_total_rows = dataset.shape[0]
    selected_indices = np.random.choice(num_total_rows, size=num_rows, replace=False)
    selected_rows = dataset[selected_indices, :]
    return selected_rows


def calculate_snr(clean_eeg, artifact_segment,λ):
    N = 512
    # Calculate the signal power (clean EEG)
    signal_power = (np.sum(clean_eeg ** 2)*(1/N))**0.5
    # Calculate the noise power (artifact segment)
    noise_power = (np.sum((λ*artifact_segment)**2)*(1/N))**0.5
    # Calculate the SNR in dB (RMS FORMULA)
    snr_db = 10 * np.log10(signal_power / noise_power)
    return round(snr_db,2)

def generate_contaminated_signal(clean_eeg_data,artifact_data,num_samples:int,snr_valid):
    num_clean_eeg_samples = clean_eeg_data.shape[0]
    num_eog_artifacts = artifact_data.shape[0]

    contaminated_eeg_data = []
    contamination_indices = []
    # lambda_values=[]
    # snr_values=[]

    for i in range(num_samples):
        clean_eeg_index = np.random.randint(0, num_clean_eeg_samples)
        eog_artifact_index = np.random.randint(0, num_eog_artifacts)

        clean_eeg_sample = clean_eeg_data[clean_eeg_index]
        artifact = artifact_data[eog_artifact_index]
        snr=-8

        while snr!=snr_valid:
          #GENERATE RANDOM LAMBDA VALUE HERE
          λ = random.uniform(-60,60)
          #GENERATE CONTAMINATED SIGNAL USING FORMULA: y=x+(λ*n)
          contaminated_eeg_sample = clean_eeg_sample + (artifact*λ)
          snr=calculate_snr(clean_eeg_sample,artifact,λ)

        contaminated_eeg_data.append(contaminated_eeg_sample)
        contamination_indices.append((clean_eeg_index, eog_artifact_index))
        # lambda_values.append(λ)
        # snr_values.append(snr)
    return np.array(contaminated_eeg_data), contamination_indices

In [None]:
contaminated_eeg_data,indices = generate_contaminated_signal(eeg,eog,samples,snr_desired)

## III. Apply Empirical Mode Decomposition

### 3.1 Apply EMD

In [None]:
def apply_emd(s):
    process = psutil.Process()
    sampling_rate = 256  # Hz
    duration = 2  # seconds
    t = np.arange(0, duration, 1/sampling_rate)

    emd = EMD(DTYPE=np.float16, max_imfs=2)

    # Start memory & time usage recording
    start_time = time.time()
    #psutil_mem_start = process.memory_info().rss / (1024 * 1024)
    mem_usage_start = memory_usage()[0]
    #mem_usage_start = psutil.Process().memory_info().rss / (1024 * 1024)

    #EMD
    IMF = EMD().emd(s,t)

    # End memory & time usage recording
    mem_usage_end = memory_usage()[0]
    #mem_usage_end = psutil.Process().memory_info().rss / (1024 * 1024)

    #psutil_mem_end = process.memory_info().rss / (1024 * 1024) #in Megabyte
    end_time = time.time()

    #Return memory & time usage results
    mem_usage = mem_usage_end-mem_usage_start
    execution_time = end_time - start_time
   # psutil_mem_use = psutil_mem_end - psutil_mem_start


    return IMF, mem_usage, execution_time

### 3.2 Get IMFs, Memory, and Time

In [None]:
IMF_data, memory_data, time_data= [],[],[]
for sample in contaminated_eeg_data:
  IMF_temp, mem_temp, time_temp= apply_emd(sample)
  IMF_data.append(IMF_temp)
  memory_data.append(mem_temp)
  time_data.append(time_temp)

In [None]:
def reconstruct_signal_from_imfs(imfs):
    # Reconstruct the signal by summing all the IMFs
    reconstructed_signal = np.sum(imfs, axis=0)

    return reconstructed_signal

In [None]:
reconstructed_emd = []
for i in IMF_data:
  reconstructed_emd.append(reconstruct_signal_from_imfs(i))

In [None]:
def calculate_nmse(original, denoised):
    mse = mean_squared_error(original, denoised)
    # count_zeros_mse=0

    # if mse==0:
    #   count_zeros_mse+=1
    #   print(f"Count of zero MSEs: {count_zeros_mse}")
    #   print(f"ORIGINAL: {original}\nDENOISED: {denoised}")
    # print(mse)

    actual_variance = np.var(original)


    #original_mse = mean_squared_error(original, original)
    # print(actual_variance)
    nmse = mse / actual_variance
    # print(nmse)

    return nmse


In [None]:
mse_list=[]
for i in range(len(contaminated_eeg_data)):
  mse_list.append(calculate_nmse(contaminated_eeg_data[i], reconstructed_emd[i] ))

#print(mse_list)

In [None]:
avg_memory,avg_time,avg_mse=0,0,0

for i in memory_data:
  avg_memory+=i

for j in time_data:
  avg_time+=j

for k in mse_list:
  avg_mse+=k

print(f"Total memory: {avg_memory} ----RECORD ME")
print(f"Total time: {avg_time}")
#print(avg_time)
avg_memory=avg_memory/(len(memory_data))
avg_time = avg_time/(len(time_data))
avg_mse = avg_mse/(len(mse_list))


print(f"Average memory data: {avg_memory}\nAverage time data: {avg_time}\nMSE: {avg_mse}")


## IV. Independent Component Analysis

In [None]:
import time
from sklearn.decomposition import FastICA

def apply_ica(data):
    runtime_total = 0
    #START measuring time & memory
    start_time = time.time()
    initial_memory = psutil.Process().memory_info().rss / (1024 * 1024)  # in MB
    print(initial_memory)

    ica=FastICA(n_components=5, random_state=0)
    ica_result = ica.fit_transform(contaminated_eeg_data.T).T
    denoised_signal = ica.inverse_transform(ica_result.T).T

    final_memory = psutil.Process().memory_info().rss / (1024 * 1024)  # in MB
    print(final_memory)
    end_time = time.time()

    runtime = end_time - start_time
    print(start_time, end_time, runtime)
    memory_usage = final_memory - initial_memory


    return ica_result,denoised_signal,memory_usage, runtime


In [None]:
ICA_data, reconstructed_ica, ica_memory_data, ica_time_data= apply_ica(contaminated_eeg_data)

In [None]:
print(f"ICA Memory: {ica_memory_data}\nICA Runtime:{ica_time_data}\nICA Runtime*10:    {ica_time_data*10}--RECORD ME")
print(f"\nRuntime*2000: {ica_time_data*2000}")

In [None]:
ica_mse_list=[]
for i in range(len(contaminated_eeg_data)):
  ica_mse_list.append(calculate_nmse(contaminated_eeg_data[i], reconstructed_ica[i] ))

#print(mse_list)

In [None]:
ica_avg_mse=0

for k in ica_mse_list:
  ica_avg_mse+=k

ica_avg_mse = avg_mse/(len(ica_mse_list))

print(f"ICA MSE: {ica_avg_mse}")
