In [None]:
!pip3 install plotly --upgrade

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.subplots as sp
import plotly.graph_objects as go

In [None]:
spect_path_prefix = "/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms/"
train_df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/train.csv")
train_df.info()

**Encephalographic DSA is a three-dimensional method to display EEG signals consisting of the EEG frequency (y-axis), the power of the EEG signal (originally the z-axis, but colour-coded to be integrated into a two-dimensional plot) and the development of the EEG power spectrum over time (x-axis). The power spectrum is encoded in different colours; blue implies minimal power and red implies high or maximal power**

Ref: https://associationofanaesthetists-publications.onlinelibrary.wiley.com/doi/10.1111/anae.14458

Know more about the EEG spectography: https://www.kaggle.com/code/seshurajup/eegs-10-20-system


In [None]:
def plot_spectogram(spec_df, prefixes, title = "Spectogram"):
    fig = sp.make_subplots(rows=len(prefixes), cols=1, subplot_titles=prefixes)
    for i, prefix in enumerate(prefixes):
        prefix_df = spec_df.filter(regex=f'^{prefix}', axis=1)
        epsilon = 1e-10
        fig.add_trace(go.Heatmap(z=np.log(prefix_df + epsilon).T,
                                 y=pd.to_numeric(prefix_df.columns.str.replace(f"{prefix}_", '')),
                                 coloraxis="coloraxis"),
                      row=i+1, col=1)
         # Update x-axis and y-axis labels
        fig.update_xaxes(title_text="Time(Seconds)", row=i+1, col=1)
        fig.update_yaxes(title_text="Frequency(Hz)", row=i+1, col=1)
        # update coloraxis
        fig.update_layout(coloraxis = {'colorscale':'Jet'}, height=1500,title_text=title)
    fig.show()

## Plot 5 spectograms with maximum votes

In [None]:
# Sum all the votes
train_df['total_votes'] = train_df[['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']].sum(axis=1)
# Find the maximum votes and corresponding index
max_votes_df = train_df.loc[train_df.groupby('spectrogram_id')['total_votes'].idxmax()]
# Sort the DataFrame in descending order based on total_votes
max_votes_df_sorted = max_votes_df.sort_values(by='total_votes', ascending=False).head()
max_votes_df_sorted.head()

In [None]:
## Observing the spectograms with max votes
# Iterate over the rows of the sorted DataFrame
for index, row in max_votes_df_sorted.iterrows():
    print(row['spectrogram_id'])
    spectrogram_id = row['spectrogram_id']
    total_votes = row["total_votes"]
    spectogram_path = f"{spect_path_prefix}{spectrogram_id}.parquet"
    spec_df = pd.read_parquet(spectogram_path)
    plot_spectogram(spec_df,["LL","RL","RP","LP"], title=f"Vote count = {total_votes}")

## Plot 5 spectograms with minimum votes

In [None]:
# Sum all the votes
train_df['total_votes'] = train_df[['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']].sum(axis=1)
# Find the maximum votes and corresponding index
min_votes_df = train_df.loc[train_df.groupby('spectrogram_id')['total_votes'].idxmin()]
# Sort the DataFrame in descending order based on total_votes
min_votes_df_sorted = min_votes_df.sort_values(by='total_votes').head()
min_votes_df_sorted.head()

In [None]:
## Observing the spectograms with min votes
# Iterate over the rows of the sorted DataFrame
for index, row in min_votes_df_sorted.iterrows():
    print(row['spectrogram_id'])
    spectrogram_id = row['spectrogram_id']
    total_votes = row["total_votes"]
    spectogram_path = f"{spect_path_prefix}{spectrogram_id}.parquet"
    spec_df = pd.read_parquet(spectogram_path)
    plot_spectogram(spec_df,["LL","RL","RP","LP"], title=f"Vote count = {total_votes}")

### Acknowledgement
- https://www.kaggle.com/code/mpwolke/seizures-classification-parquet
- https://www.kaggle.com/code/clehmann10/plot-spectrograms/notebook