### Import necessary libraries

In [None]:
import pandas as pd#导入csv文件的库
import numpy as np#进行矩阵运算的库
from lightgbm import LGBMClassifier#导入lgbm分类器
import dill#对对象进行序列化和反序列化(例如保存和加载树模型)
import warnings#避免一些可以忽略的报错
warnings.filterwarnings('ignore')#filterwarnings()方法是用于设置警告过滤器的方法，它可以控制警告信息的输出方式和级别。

### Config

In [None]:
#设置随机种子,保证模型可以复现
import random
TARGETS=['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']
seed=2024
num_folds=10
#设置随机种子,保证模型可以复现
np.random.seed(seed)
random.seed(seed)
#除了时间列,其他列都要
SPEC_COLS = pd.read_parquet(f"/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms/1000086677.parquet").columns[1:]
#对这些列的数据构造特征.
FEATURES = [f'{c}_mean_10m' for c in SPEC_COLS]
FEATURES += [f'{c}_min_10m' for c in SPEC_COLS]
FEATURES += [f'{c}_mean_20s' for c in SPEC_COLS]
FEATURES += [f'{c}_min_20s' for c in SPEC_COLS]

### test feature engineer

In [None]:
test = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/test.csv')
#测试数据
data = np.zeros((len(test),len(FEATURES)))
    
for k in range(len(test)):
    row = test.iloc[k]##取出第k个数据,或者说第K行
    s = int( row.spectrogram_id )#spectrogram_id
    spec = pd.read_parquet(f'/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms/{s}.parquet')
    
    r = 10#测试数据不提供row['min'],row['max'],这里就用训练数据的中位数来尝试.
    #数据 时间维度是[r:r+300],列是400,按列对不是缺失值的数据求均值和最小值.
    data[k,:400] = np.nanmean( spec.iloc[r:r+300,1:].values, axis=0)
    data[k,400:800] = np.nanmin( spec.iloc[r:300,1:].values, axis=0)
    #数据 时间维度是[r+145:r+155],列是400,按列对不是缺失值的数据求均值和最小值.
    data[k,800:1200] = np.nanmean( spec.iloc[r+145:r+155,1:].values, axis=0)
    data[k,1200:1600] = np.nanmin( spec.iloc[r+145:r+155,1:].values, axis=0)

test[FEATURES] = data
print('test shape',test.shape)

### load models and predict

In [None]:
def pickle_load(path):
    #打开指定的路径path,binary read(二进制读取)
    with open(path, mode="rb") as f:
        #按照制定路径去加载模型
        data = dill.load(f)
        return data
preds = []

for fold in range(num_folds):
    
    model = pickle_load(f'/kaggle/input/hms-baseline-lgb-10-folds-training/lgb_f{fold}.model')
    
    # Make predictions
    pred = model.predict_proba(test[FEATURES])
    preds.append(pred)

#对预测结果求平均
pred = np.mean(preds, axis=0)
print('Test preds shape', pred.shape)

### Submission

In [None]:
submission = pd.DataFrame({'eeg_id':test.eeg_id.values})
submission[TARGETS] = pred
submission.to_csv('submission.csv',index=None)
submission.head()