In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
import numpy as np

# Example 2x3 matrix
matrix = np.array([[1, 2, 3],
                   [4, 5, 6]])

# Transpose the matrix and flatten it into a single row
result = matrix.T.flatten()

print(result)


In [None]:
train_df = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')
test = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/test.csv')

In [None]:
train_df.head()

In [None]:
eeg_dir = "/kaggle/input/hms-harmful-brain-activity-classification/train_eegs"
spectrogram_dir = "/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms"

In [None]:
eeg_files = os.listdir(eeg_dir)
print(f"There are {len(eeg_files)} EEG parquet files")

In [None]:
spec_files = os.listdir(spectrogram_dir)
print(f"There are {len(spec_files)} Spectrogram parquet files")

In [None]:
train_df.shape

In [None]:
targets = train_df.columns[-6:]
print(f"There are {len(targets)} Targets!")
print(list(targets))

In [None]:
train = train_df.groupby('eeg_id')[['spectrogram_id','spectrogram_label_offset_seconds']].agg(
    {'spectrogram_id':'first','spectrogram_label_offset_seconds':'min'})
train.columns = ['spec_id','min']


In [None]:
tmp = train_df.groupby('eeg_id')[['spectrogram_id','spectrogram_label_offset_seconds']].agg(
    {'spectrogram_label_offset_seconds':'max'})
train['max'] = tmp

In [None]:
tmp = train_df.groupby('eeg_id')[['patient_id']].agg('first') # The code adds the patient_id for each eeg_id to the train DataFrame. This links each EEG segment to a specific patient.
train['patient_id'] = tmp

In [None]:
tmp = train_df.groupby('eeg_id')[targets].agg('sum') # The code sums up the target variable counts (like votes for seizure, LPD, etc.) for each eeg_id.
for t in targets:
    train[t] = tmp[t].values

In [None]:
y_data = train[targets].values # It then normalizes these counts so that they sum up to 1. This step converts the counts into probabilities, which is a common practice in classification tasks.
y_data = y_data / y_data.sum(axis=1,keepdims=True)
train[targets] = y_data

In [None]:
tmp = train_df.groupby('eeg_id')[['expert_consensus']].agg('first') # For each eeg_id, the code includes the expert_consensus on the EEG segment's classification.
train['target'] = tmp

In [None]:
train = train.reset_index() # This makes eeg_id a regular column, making the DataFrame easier to work with.
print('Train non-overlapp eeg_id shape:', train.shape )
train.head()

In [None]:
train['eeg_path'] = train['eeg_id'].apply(lambda x: os.path.join(eeg_dir, f'{x}.parquet'))

In [None]:
train['spectrogram_path'] = train['spec_id'].apply(lambda x: os.path.join(spectrogram_dir, f'{x}.parquet'))

In [None]:
train.head()

In [None]:
train.shape

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [None]:
def load_eeg_data(eeg_path):
    if os.path.isfile(eeg_path):
        eeg_data = pd.read_parquet(eeg_path)
        return eeg_data
    else:
        print(f"Invalid file path: {eeg_path}")
        return None

In [None]:
def load_spectrogram_data(spec_path):
    if os.path.isfile(spec_path):
        spec_data = pd.read_parquet(spec_path)
        return spec_data
    else:
        print(f"Invalid file path: {spec_path}")
        return None

In [None]:
load_eeg_data('/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/1000913311.parquet')

In [None]:
load_spectrogram_data('/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms/1000086677.parquet')