In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if '.csv' in filename:
            print(os.path.join(dirname, filename))
            
            
warnings.filterwarnings('ignore')

In [None]:
df_train = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')
df_train.info()

In [None]:
def corr_eeg(spec_id):
    
    eeg_ids = df_train[df_train.spectrogram_id == spec_id]['eeg_id'].unique()
#      eeg_ids is list of eeg-id for this specid
    
    for eeg_id in eeg_ids: 
        eeg_id_str = str(eeg_id) + '.parquet'        
    
        # plot spectogram 
        df = pd.read_parquet('/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/' + eeg_id_str )
        
        # Assuming you have a DataFrame named 'df' and a correlation limit
        correlation_limit = 0.8

        # Calculate the correlation matrix
        correlation_matrix = df.corr()

        # Iterate through the columns and find pairs with correlations above the limit
        correlation_pairs = []
        for i in range(len(correlation_matrix.columns)):
            for j in range(i):
                if abs(correlation_matrix.iloc[i, j]) > correlation_limit:
                    pair = (correlation_matrix.columns[i], correlation_matrix.columns[j])
                    correlation_pairs.append(pair)

        # Print the column pairs with correlations above the limit
        for pair in correlation_pairs:
            print(f"Columns {pair[0]} and {pair[1]} have a correlation of {correlation_matrix.loc[pair[0], pair[1]]}")

In [None]:
target = 'Seizure' # 'GPD' 'LRDA' 'GRDA' 'LPD'
np.random.seed(42)
spec_ids = df_train[df_train.expert_consensus == target]['spectrogram_id'].unique()
ran_spec_ids = np.random.choice(spec_ids, size=3, replace=False)
ran_spec_ids # this is a list of random spec-ids 

for spec_id in ran_spec_ids:
    print('###############################################################################################################')
    corr_eeg(spec_id)
    print('###############################################################################################################')
    

In [None]:
# for target -1 which is seizure, make a list of columns pairs which have correlation > 0.8 
# keep size = 3 to get a better solution . 
# repeat this above step by changing the target to other 4 targets one by one. 'GPD' 'LRDA' 'GRDA' 'LPD'


In [None]:
def plot_spec(spec_id): 
    spec_id =  str(spec_id) + '.parquet' # '1000646093.parquet'
    # eeg_id_1 = 

    # plot spectogram 
    spec_file = pd.read_parquet('/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms/' + spec_id )

    # Display the spectrogram using imshow
    plt.imshow(np.log(spec_file.T)) # , cmap='viridis', origin='lower')  # Adjust colormap as desired
    plt.colorbar(label='Spectral Power')  # Add a colorbar for interpretation
    plt.xlabel('Time')
    plt.ylabel('Frequency')
    #     plt.title('Spectrogram')
    plt.title(str(spec_id))
    plt.show()

    

In [None]:
def plot_eeg_ids_from_spect_id(spec_id, sig_to_plot, plot_no):
    eeg_ids = df_train[df_train.spectrogram_id == spec_id]['eeg_id'].unique()
    
    for eeg_id in eeg_ids: 
        eeg_id_str = str(eeg_id) + '.parquet'        
    
        # plot spectogram 
        df = pd.read_parquet('/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/' + eeg_id_str )
        
#         print(df.head(5))

#             df.columns
        plt.figure()
        if plot_no == 1:
            plt.plot(df.index[0:200], df[sig_to_plot][0:200])
            plt.title('spectrum id' + str(spec_id) + '  ' + 'eeg_id' + ' ' +  str(eeg_id) )
        elif plot_no == 2:
            plt.plot(df.index[0:10000:50], df[sig_to_plot][0:10000:50])
            plt.title('spectrum id' + str(spec_id) + '  ' + 'eeg_id' + str(eeg_id) )
        else:
            plt.plot(df.index, df[sig_to_plot]) # , label=signal)  
            plt.title('spectrum id' + str(spec_id) + '  ' + 'eeg_id' + str(eeg_id) )
    

    
    

In [None]:
import pandas as pd
from scipy.signal import butter, filtfilt

# Filtering - Band Pass

In [None]:
# EEG Plots # Change the target and the plot no from 1 to 2 to 3 for different length and sampling of data, refer to function plot_eeg_ids_from_spect_id for more details
import numpy as np 
target = 'Seizure' # 'GPD' 'LRDA' 'GRDA' 'LPD'
np.random.seed(42)
spec_ids = df_train[df_train.expert_consensus == target]['spectrogram_id'].unique()
ran_spec_ids = np.random.choice(spec_ids, size=1, replace=False)
ran_spec_ids

for spec_id in ran_spec_ids:
    eeg_ids = df_train[df_train.spectrogram_id == spec_id]['eeg_id'].unique()
    
    for eeg_id in eeg_ids: 
        eeg_id_str = str(eeg_id) + '.parquet'        
    
        # plot spectogram 
        df = pd.read_parquet('/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/' + eeg_id_str )


        

        # Define the sampling rate (in Hz) of your EEG signal
        # You need to know this beforehand; it's a fixed value based on how the EEG data was collected.
        fs = 200  # For example, 256 Hz

        # Define the frequency bands in Hz
        alpha_band = (8, 13)
        beta_band = (13, 30)
        gamma_band = (30, 80)

        # Create a bandpass filter for each frequency band
        def bandpass_filter(data, lowcut, highcut, fs, order=5):
            nyquist = 0.5 * fs
            low = lowcut / nyquist
            high = highcut / nyquist
            b, a = butter(order, [low, high], btype='band')
            y = filtfilt(b, a, data)
            return y

        # Apply the bandpass filter to each EEG signal column
        eeg_columns = ['Fp1', 'F3', 'C3', 'P3', 'F7', 'T3', 'T5', 'O1', 'Fz', 'Cz', 'Pz',
                       'Fp2', 'F4', 'C4', 'P4', 'F8', 'T4', 'T6', 'O2']

        # Assuming 'df' is your DataFrame with EEG signal data
        filtered_signals = {}
        for column in eeg_columns:
            filtered_signals[f'{column}_alpha'] = bandpass_filter(df[column], alpha_band[0], alpha_band[1], fs)
            filtered_signals[f'{column}_beta'] = bandpass_filter(df[column], beta_band[0], beta_band[1], fs)
            filtered_signals[f'{column}_gamma'] = bandpass_filter(df[column], gamma_band[0], gamma_band[1], fs)

        # Convert the filtered signals dictionary to a DataFrame if needed
        filtered_df = pd.DataFrame(filtered_signals)


        sig_to_plot = 'Fp1'    
#         sig_to_plot_filt = sig_to_plot + '_alpha'
        sig_to_plot_filt = sig_to_plot + '_gamma'
        plt.figure()
        plt.plot(df[sig_to_plot][0:200])
        plt.plot(filtered_signals[sig_to_plot_filt][0:200])
#         plt.plot(df['Fp1'])
#         plt.plot(filtered_signals['Fp1_alpha'])
#         plt.plot(filtered_signals['Fp1_beta'])
#         plt.plot(filtered_signals['Fp1_gamma'])
        

# Plot the EEG Signals tasked to you, for every Target class and observe if visually you can see any similarity in the signal that can be associated to a particular target class

In [None]:
# EEG Plots # Change the target and the plot no from 1 to 2 to 3 for different length and sampling of data, refer to function plot_eeg_ids_from_spect_id for more details
import numpy as np 
target = 'Seizure' # 'GPD' 'LRDA' 'GRDA' 'LPD'
np.random.seed(42)
spec_ids = df_train[df_train.expert_consensus == target]['spectrogram_id'].unique()
ran_spec_ids = np.random.choice(spec_ids, size=5, replace=False)
ran_spec_ids

for spec_id in ran_spec_ids:
    plot_eeg_ids_from_spect_id(spec_id, 'Fp1', 1)



# Spectrogram Analysis

In [None]:
# randomly select 5 spectrogram for class 

target = 'Seizure' # 'LRDA' 'GRDA' 'LPD' 'Seizure'
# plot the 5 spectograms 
import numpy as np 
np.random.seed(42)
spec_ids = df_train[df_train.expert_consensus =='Seizure']['spectrogram_id'].unique()
ran_spec_ids = np.random.choice(spec_ids, size=5, replace=False)
ran_spec_ids

for spec_id in ran_spec_ids:
    plot_spec(spec_id)