#### In the previous notebook<a href="https://www.kaggle.com/code/yunsuxiaozi/hms-learn-to-transform-eeg-to-spectrogram-256-256">HMS Learn to transform eeg to spectrogram(256*256)</a>,I learned how to use eeg data to generate spectrograms. And in this notebook<a href="https://www.kaggle.com/code/yunsuxiaozi/hms-eegs-resnet34d-512-512-training-5-folds">HMS eegs resnet34d(512*512 training 5 folds)</a>, I  use these data to train the model.

### Import necessary libraries

In [None]:
#necessary
import pandas as pd#导入csv文件的库
import numpy as np#进行矩阵运算的库
import torch #一个深度学习的库Pytorch
import torch.nn as nn#neural network,神经网络
import torch.nn.functional as F#神经网络函数库
import torchvision.transforms as transforms#Pytorch下面的图像处理库,用于对图像进行数据增强
#设置随机种子
import random
import warnings#避免一些可以忽略的报错
warnings.filterwarnings('ignore')#filterwarnings()方法是用于设置警告过滤器的方法，它可以控制警告信息的输出方式和级别。

### Config

In [None]:
class Config:
    seed=2024
    image_transform=transforms.Resize((512, 512))
    num_folds=5

### Set seed

In [None]:
def seed_everything(seed):
    torch.backends.cudnn.deterministic = True#将cuda加速的随机数生成器设为确定性模式
    torch.backends.cudnn.benchmark = True#关闭CuDNN框架的自动寻找最优卷积算法的功能，以避免不同的算法对结果产生影响
    torch.manual_seed(seed)#pytorch的随机种子
    np.random.seed(seed)#numpy的随机种子
    random.seed(seed)#python内置的随机种子
seed_everything(Config.seed)

### trained_models

In [None]:
models=[]
for i in range(Config.num_folds):
    model = torch.load(f"/kaggle/input/hms-eegs-resnet34d-512-512-training-5-folds/HMS_resnet34d_fold{i}.pth")
    models.append(model)
models.append(model)

### Function to transform eegs to spectrograms

In [None]:
import librosa#音频处理和分析的库

#脑电图电极的位置或区域. 'Left Lower','left upper','Right Upper','RightLower'(顺时针) 
#NAMES = ['LL','LP','RP','RR']
#eeg信号采集的相对位置
FEATS = [['Fp1','F7','T3','T5','O1'],
         ['Fp1','F3','C3','P3','O1'],
         ['Fp2','F8','T4','T6','O2'],
         ['Fp2','F4','C4','P4','O2']]

#将eeg文件转成spectrogram文件
def spectrogram_from_eeg(parquet_path):#parquet_path是eeg文件的路径
    #根据路径,加载eeg的中间50秒
    eeg = pd.read_parquet(parquet_path)
    middle = len(eeg)//2-5000
    eeg = eeg.iloc[middle:middle+10000]
    
    #初始化图片大小
    img = np.zeros((256,256,4),dtype='float32')
    
    for k in range(4):
        COLS = FEATS[k]#取出FEATS第K行的特征
        
        for kk in range(4):
            #计算电势差(第kk个位置-第KK+1个位置) 
            x = eeg[COLS[kk]].values - eeg[COLS[kk+1]].values
            
            #对缺失值填充为均值
            m = np.nanmean(x)#计算非nan位置数值的平均值
            if np.isnan(x).mean()<1:#有不是缺失值的数据
                x = np.nan_to_num(x,nan=m)#将数组x中为nan值替换为均值m
            else: #np.isnan(x).mean()==1,即全是缺失值
                x[:] = 0#填充为0

            #计算音频信号的梅尔频谱特征 (n_mels,len(x)//hop_length+1)
            #y：音频信号的波形数据,sr：音频信号的采样率.
            # hop_length：帧移（每一帧之间的步长）的长度，通过将原始音频分割成多个短时帧来进行频谱计算。
            # n_fft：FFT 窗口大小，表示每个帧的长度.
            # n_mels：梅尔滤波器的数量，决定了梅尔频谱的分辨率.
            # fmin：梅尔滤波器的最低频率,fmax：梅尔滤波器的最高频率.
            # win_length：窗口函数的长度.
            mel_spec = librosa.feature.melspectrogram(y=x, sr=200, hop_length=len(x)//256,
                  n_fft=1024, n_mels=256,fmin=0, fmax=20, win_length=128)

            """
            对每个元素取以10为底的对数，得到对数功率谱矩阵.
            根据参考功率ref对对数功率谱矩阵进行平移，使得最大值等于梅尔频谱矩阵的最大值.
            截断超过width的数据,避免出现噪声或不稳定性导致的误差.取值范围为(- infty,0)
            """
            #宽度调整
            width = (mel_spec.shape[1]//32)*32
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max).astype(np.float32)[:,:width]

            #类似归一化的操作
            mel_spec_db = (mel_spec_db+40)/40 
            img[:,:,k] += mel_spec_db
                
        #4个时刻的差值,故取平均.
        img[:,:,k] /= 4.0
    #变成(256,256)
    img=np.mean(img,axis=2)
    return img

### Predict and Submission

In [None]:
submission=pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/sample_submission.csv")
#获取训练数据的img
eeg_ids = submission['eeg_id'].values
test_preds=[]
for eeg_id in eeg_ids:
    #调用函数获取img
    data = spectrogram_from_eeg(f'/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/{eeg_id}.parquet')
    data_tensor = torch.unsqueeze(torch.Tensor(data), dim=0)
    data=Config.image_transform(data_tensor)
    test_pred=[]
    for model in models:
        model.eval()
        with torch.no_grad():
            pred=F.softmax(model(data.unsqueeze(0)))[0]
            pred=pred.detach().cpu().numpy()
        test_pred.append(pred)
    test_pred=np.array(test_pred).mean(axis=0)
    test_preds.append(test_pred)
test_preds=np.array(test_preds)
test_preds

In [None]:
submission=pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/sample_submission.csv")
labels=['seizure','lpd','gpd','lrda','grda','other']
for i in range(len(labels)):
    submission[f'{labels[i]}_vote']=test_preds[:,i]
submission.to_csv("submission.csv",index=None)
submission.head()