In [4]:


# Load the dataset files

import numpy as np

# Load dataset files
synthetic_eeg_eog_path = "/home/tulgaa/Desktop/denoisenet/Linear_Mixing/EEG+EOG/Linear_synthetic_eeg_eog.npy"
clean_eeg_path = "/home/tulgaa/Desktop/denoisenet/Linear_Mixing/EEG+EOG/EEG_all_epochs.npy"

# Load the numpy arrays
synthetic_eeg_eog = np.load(synthetic_eeg_eog_path, allow_pickle=True).item()  # Dictionary of SNR levels
clean_eeg = np.load(clean_eeg_path)  # Ground truth EEG

# Inspect dataset keys and shapes
print("🔹 Synthetic EEG+EOG Dictionary Keys (SNR Levels):", synthetic_eeg_eog.keys())
print("🔹 Clean EEG Shape:", clean_eeg.shape)

# Print shape of each SNR level data
for snr in synthetic_eeg_eog.keys():
    print(f"🔹 SNR Level {snr}: Shape = {synthetic_eeg_eog[snr].shape}")

# Check statistics of clean EEG data
print("🔹 Clean EEG Mean:", np.mean(clean_eeg))
print("🔹 Clean EEG Std Dev:", np.std(clean_eeg))


🔹 Synthetic EEG+EOG Dictionary Keys (SNR Levels): dict_keys([-7, -6, -5, -4, -3, -2, -1, 0, 1, 2])
🔹 Clean EEG Shape: (3400, 512)
🔹 SNR Level -7: Shape = (3400, 512)
🔹 SNR Level -6: Shape = (3400, 512)
🔹 SNR Level -5: Shape = (3400, 512)
🔹 SNR Level -4: Shape = (3400, 512)
🔹 SNR Level -3: Shape = (3400, 512)
🔹 SNR Level -2: Shape = (3400, 512)
🔹 SNR Level -1: Shape = (3400, 512)
🔹 SNR Level 0: Shape = (3400, 512)
🔹 SNR Level 1: Shape = (3400, 512)
🔹 SNR Level 2: Shape = (3400, 512)
🔹 Clean EEG Mean: -0.16774763156951383
🔹 Clean EEG Std Dev: 231.82429071340553


In [5]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Step 1: Expand clean EEG dataset (repeat each sample 10 times)
X_clean = np.repeat(clean_eeg, 10, axis=0)  # Shape becomes (34000, 512)

# Step 2: Stack all noisy EEG signals across SNR levels
X_noisy = np.concatenate([synthetic_eeg_eog[snr] for snr in synthetic_eeg_eog.keys()], axis=0)  # (34000, 512)

# Step 3: Normalize the data between -1 and 1
scaler = MinMaxScaler(feature_range=(-1, 1))

X_clean_scaled = scaler.fit_transform(X_clean)  # Normalize clean EEG
X_noisy_scaled = scaler.transform(X_noisy)  # Normalize noisy EEG

# Print dataset details
print(f"✅ Final dataset shapes: Noisy EEG {X_noisy_scaled.shape}, Clean EEG {X_clean_scaled.shape}")
print(f"✅ Clean EEG Mean after Scaling: {np.mean(X_clean_scaled):.4f}, Std Dev: {np.std(X_clean_scaled):.4f}")
print(f"✅ Noisy EEG Mean after Scaling: {np.mean(X_noisy_scaled):.4f}, Std Dev: {np.std(X_noisy_scaled):.4f}")


✅ Final dataset shapes: Noisy EEG (34000, 512), Clean EEG (34000, 512)
✅ Clean EEG Mean after Scaling: -0.0066, Std Dev: 0.2461
✅ Noisy EEG Mean after Scaling: -0.0068, Std Dev: 0.4158


In [6]:
from sklearn.model_selection import train_test_split

# Dictionary to store split data
X_train_list, X_test_list, y_train_list, y_test_list = [], [], [], []

# Split each SNR level separately to maintain distribution
for snr in synthetic_eeg_eog.keys():
    noisy_snr = synthetic_eeg_eog[snr]  # Get noisy EEG for this SNR level
    clean_snr = clean_eeg  # Corresponding clean EEG (same across all SNRs)
    
    # Split (80% train, 20% test)
    X_train_snr, X_test_snr, y_train_snr, y_test_snr = train_test_split(
        noisy_snr, clean_snr, test_size=0.2, random_state=42
    )

    # Store
    X_train_list.append(X_train_snr)
    X_test_list.append(X_test_snr)
    y_train_list.append(y_train_snr)
    y_test_list.append(y_test_snr)

# Stack all SNR levels together
X_train = np.vstack(X_train_list)
X_test = np.vstack(X_test_list)
y_train = np.vstack(y_train_list)
y_test = np.vstack(y_test_list)

# Print dataset sizes
print(f"✅ Final Train Set: Noisy EEG {X_train.shape}, Clean EEG {y_train.shape}")
print(f"✅ Final Test Set: Noisy EEG {X_test.shape}, Clean EEG {y_test.shape}")


✅ Final Train Set: Noisy EEG (27200, 512), Clean EEG (27200, 512)
✅ Final Test Set: Noisy EEG (6800, 512), Clean EEG (6800, 512)


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using Device: {device}")

# Define a PyTorch Dataset class
class EEGDataset(Dataset):
    def __init__(self, noisy_data, clean_data):
        self.noisy_data = torch.tensor(noisy_data, dtype=torch.float32).unsqueeze(-1)  # (N, 512, 1)
        self.clean_data = torch.tensor(clean_data, dtype=torch.float32).unsqueeze(-1)  # (N, 512, 1)

    def __len__(self):
        return len(self.noisy_data)

    def __getitem__(self, idx):
        return self.noisy_data[idx], self.clean_data[idx]

# Create DataLoader objects
batch_size = 64
train_dataset = EEGDataset(X_train, y_train)
test_dataset = EEGDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the LSTM-based Denoising Model
class LSTMDenoiser(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2):
        super(LSTMDenoiser, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, input_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out)
        return out

# Initialize model
model = LSTMDenoiser().to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error for denoising
optimizer = optim.Adam(model.parameters(), lr=0.001)

print("✅ LSTM Model Ready!")


✅ Using Device: cuda
✅ LSTM Model Ready!


In [10]:
import torch.optim.lr_scheduler as lr_scheduler

# Function to train the model with Early Stopping
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=50, patience=3):
    model.train()  # Set model to training mode

    best_loss = float("inf")
    early_stop_counter = 0

    # Learning Rate Scheduler (Reduce LR if loss plateaus)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5, verbose=True)

    for epoch in range(num_epochs):
        total_train_loss = 0.0
        total_test_loss = 0.0

        # Training loop
        for noisy_batch, clean_batch in train_loader:
            noisy_batch, clean_batch = noisy_batch.to(device), clean_batch.to(device)

            optimizer.zero_grad()
            outputs = model(noisy_batch)
            loss = criterion(outputs, clean_batch)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        # Validation loop
        model.eval()
        with torch.no_grad():
            for noisy_batch, clean_batch in test_loader:
                noisy_batch, clean_batch = noisy_batch.to(device), clean_batch.to(device)
                outputs = model(noisy_batch)
                loss = criterion(outputs, clean_batch)
                total_test_loss += loss.item()

        model.train()

        # Calculate average losses
        avg_train_loss = total_train_loss / len(train_loader)
        avg_test_loss = total_test_loss / len(test_loader)

        # Learning rate scheduling
        scheduler.step(avg_test_loss)

        # Print loss for monitoring
        print(f"🔹 Epoch [{epoch+1}/{num_epochs}] | Train Loss: {avg_train_loss:.6f} | Test Loss: {avg_test_loss:.6f}")

        # Check for early stopping
        if avg_test_loss < best_loss:
            best_loss = avg_test_loss
            early_stop_counter = 0  # Reset counter
            torch.save(model.state_dict(), "best_lstm_model.pth")  # Save best model
            print("✅ Model Improved! Saving...")
        else:
            early_stop_counter += 1

        if early_stop_counter >= patience:
            print("⏹️ Early Stopping Triggered. Training Stopped!")
            break

    print("✅ Training Completed!")

# Start Training with Early Stopping
train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=50, patience=3)




🔹 Epoch [1/50] | Train Loss: 49235.290175 | Test Loss: 45445.583930
✅ Model Improved! Saving...
🔹 Epoch [2/50] | Train Loss: 42652.147877 | Test Loss: 40136.137650
✅ Model Improved! Saving...
🔹 Epoch [3/50] | Train Loss: 37929.832449 | Test Loss: 36259.897872
✅ Model Improved! Saving...
🔹 Epoch [4/50] | Train Loss: 34330.383483 | Test Loss: 33035.737332
✅ Model Improved! Saving...
🔹 Epoch [5/50] | Train Loss: 31383.096916 | Test Loss: 31051.970794
✅ Model Improved! Saving...
🔹 Epoch [6/50] | Train Loss: 29045.718506 | Test Loss: 28399.726489
✅ Model Improved! Saving...
🔹 Epoch [7/50] | Train Loss: 26988.136144 | Test Loss: 26539.371824
✅ Model Improved! Saving...
🔹 Epoch [8/50] | Train Loss: 25366.737367 | Test Loss: 25562.872518
✅ Model Improved! Saving...
🔹 Epoch [9/50] | Train Loss: 24005.774113 | Test Loss: 23990.529607
✅ Model Improved! Saving...
🔹 Epoch [10/50] | Train Loss: 22869.467394 | Test Loss: 22981.991010
✅ Model Improved! Saving...
🔹 Epoch [11/50] | Train Loss: 21898.802

In [12]:
import numpy as np
from scipy.stats import pearsonr
from scipy.fftpack import fft

# Function to compute RRMSE (Relative Root Mean Square Error)
def compute_rrmse(denoised, clean):
    return np.sqrt(np.mean((denoised - clean) ** 2)) / np.sqrt(np.mean(clean ** 2))

# Function to compute RRMSE in the Spectrum Domain (RRMSE-S)
def compute_rrmse_spectrum(denoised, clean):
    fft_clean = np.abs(fft(clean))  # Compute FFT
    fft_denoised = np.abs(fft(denoised))
    return compute_rrmse(fft_denoised, fft_clean)

# Function to compute Correlation Coefficient (CC)
def compute_cc(denoised, clean):
    return pearsonr(denoised.flatten(), clean.flatten())[0]

# Initialize metric storage
rrmse_t_list = []
rrmse_s_list = []
cc_list = []

with torch.no_grad():
    for noisy_batch, clean_batch in test_loader:
        noisy_batch, clean_batch = noisy_batch.to(device), clean_batch.to(device)
        denoised_batch = model(noisy_batch).cpu().numpy().squeeze()
        clean_batch_np = clean_batch.cpu().numpy().squeeze()

        # Compute metrics for each sample
        for i in range(len(clean_batch_np)):
            rrmse_t_list.append(compute_rrmse(denoised_batch[i], clean_batch_np[i]))
            rrmse_s_list.append(compute_rrmse_spectrum(denoised_batch[i], clean_batch_np[i]))
            cc_list.append(compute_cc(denoised_batch[i], clean_batch_np[i]))

# Compute final averaged values
rrmse_t = np.mean(rrmse_t_list)
rrmse_s = np.mean(rrmse_s_list)
cc = np.mean(cc_list)

print(f"✅ RRMSE-T (Time Domain): {rrmse_t:.6f}")
print(f"✅ RRMSE-S (Spectrum Domain): {rrmse_s:.6f}")
print(f"✅ Correlation Coefficient (CC): {cc:.6f}")


✅ RRMSE-T (Time Domain): 0.533674
✅ RRMSE-S (Spectrum Domain): 0.347567
✅ Correlation Coefficient (CC): 0.843706


In [13]:
import numpy as np
from scipy.stats import pearsonr
from scipy.fftpack import fft

# Function to compute RRMSE (Relative Root Mean Square Error)
def compute_rrmse(denoised, clean):
    return np.sqrt(np.mean((denoised - clean) ** 2)) / np.sqrt(np.mean(clean ** 2))

# Function to compute RRMSE in the Spectrum Domain (RRMSE-S)
def compute_rrmse_spectrum(denoised, clean):
    fft_clean = np.abs(fft(clean))  # Compute FFT
    fft_denoised = np.abs(fft(denoised))
    return compute_rrmse(fft_denoised, fft_clean)

# Function to compute Correlation Coefficient (CC)
def compute_cc(denoised, clean):
    return pearsonr(denoised.flatten(), clean.flatten())[0]

# Store results per SNR level
snr_metrics = {}

# Loop through each SNR level
with torch.no_grad():
    for snr in synthetic_eeg_eog.keys():
        print(f"🔹 Evaluating for SNR Level: {snr}")

        # Get test data for this SNR level
        noisy_snr = synthetic_eeg_eog[snr][-int(0.2 * len(synthetic_eeg_eog[snr])):]  # Take 20% as test set
        clean_snr = clean_eeg[-int(0.2 * len(clean_eeg)):]  # Corresponding clean EEG

        # Convert to PyTorch tensors
        noisy_tensor = torch.tensor(noisy_snr, dtype=torch.float32).unsqueeze(-1).to(device)
        clean_tensor = torch.tensor(clean_snr, dtype=torch.float32).unsqueeze(-1).to(device)

        # Get model predictions
        denoised_tensor = model(noisy_tensor).cpu().numpy().squeeze()
        clean_snr_np = clean_tensor.cpu().numpy().squeeze()

        # Compute metrics
        rrmse_t_list = [compute_rrmse(denoised_tensor[i], clean_snr_np[i]) for i in range(len(clean_snr_np))]
        rrmse_s_list = [compute_rrmse_spectrum(denoised_tensor[i], clean_snr_np[i]) for i in range(len(clean_snr_np))]
        cc_list = [compute_cc(denoised_tensor[i], clean_snr_np[i]) for i in range(len(clean_snr_np))]

        # Store results for this SNR level
        snr_metrics[snr] = {
            "RRMSE-T": np.mean(rrmse_t_list),
            "RRMSE-S": np.mean(rrmse_s_list),
            "CC": np.mean(cc_list)
        }

# Print Results in Table Format
print("\n🔹 **Final Evaluation Per SNR Level:**")
print("-----------------------------------------------------")
print("| SNR  |  RRMSE-T  |  RRMSE-S  |   CC   |")
print("-----------------------------------------------------")
for snr in sorted(snr_metrics.keys()):
    print(f"| {snr:3d}  |  {snr_metrics[snr]['RRMSE-T']:.6f}  |  {snr_metrics[snr]['RRMSE-S']:.6f}  |  {snr_metrics[snr]['CC']:.6f}  |")
print("-----------------------------------------------------")


🔹 Evaluating for SNR Level: -7
🔹 Evaluating for SNR Level: -6
🔹 Evaluating for SNR Level: -5
🔹 Evaluating for SNR Level: -4
🔹 Evaluating for SNR Level: -3
🔹 Evaluating for SNR Level: -2
🔹 Evaluating for SNR Level: -1
🔹 Evaluating for SNR Level: 0
🔹 Evaluating for SNR Level: 1
🔹 Evaluating for SNR Level: 2

🔹 **Final Evaluation Per SNR Level:**
-----------------------------------------------------
| SNR  |  RRMSE-T  |  RRMSE-S  |   CC   |
-----------------------------------------------------
|  -7  |  0.604605  |  0.396976  |  0.806070  |
|  -6  |  0.576244  |  0.379189  |  0.823137  |
|  -5  |  0.552102  |  0.363474  |  0.837296  |
|  -4  |  0.531786  |  0.350357  |  0.848761  |
|  -3  |  0.514642  |  0.339950  |  0.858336  |
|  -2  |  0.499370  |  0.330671  |  0.866802  |
|  -1  |  0.486451  |  0.322775  |  0.873886  |
|   0  |  0.475142  |  0.315710  |  0.880000  |
|   1  |  0.465327  |  0.308980  |  0.885121  |
|   2  |  0.457174  |  0.302691  |  0.889400  |
------------------------