In [None]:
import pandas as pd
import os
import numpy as np
from autosklearn.classification import AutoSklearnClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

## 提取特征函数
## 提取特征函数
## file_list:音频文件路径的列表  list类型
## 返回值numpy.ndarray  形状:(len(file_list),88)
def extract_audio_feature(file_list,save_path):
    # 如果已存在保存的特征文件，则直接加载
    if os.path.exists(save_path):
        print(f"已找到保存的特征文件 '{save_path}'，正在加载...")
        feature = np.load(save_path)
        print("特征加载完毕！")
        return feature    
    print("请耐心等待特征提取完！")
    smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals)#指定了特征集（eGeMAPSv02）和特征级别（Functionals）
    feature = []
    for n,file in enumerate(file_list):#enumerate函数用于遍历，返回一个迭代器（索引和值）
        y = smile.process_file(file)#调用process_file函数处理音频文件
        y = y.to_numpy().reshape(-1)#-1 表示自动计算这一维的大小
        feature.append(y)
        if (n+1)%100 == 0:
            print(f"当前进度{n+1}/{len(file_list)}")
    print("此次特征提取已结束")
    print("-------------------------------")
    feature = np.stack(feature,axis = 0)

    # 将特征保存到指定的文件中
    np.save(save_path, feature)
    print(f"特征已保存到文件 '{save_path}'")
    
    return feature

## 性能指标计算函数
def calculate_score_classification(preds, labels, average_f1='macro'):
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average=average_f1, zero_division=0)
    precision = precision_score(labels, preds, average='macro', zero_division=0)
    ua = recall_score(labels, preds, average='macro', zero_division=0)
    confuse_matrix = confusion_matrix(labels, preds)
    return accuracy, ua, f1, precision, confuse_matrix

## 自动机器学习模型类
class MyAutoSklearn:
    def __init__(self):
        self.model = AutoSklearnClassifier(time_left_for_this_task=3600, per_run_time_limit=30)

    def train(self, features, labels):
        print("开始训练模型...")
        self.model.fit(features, labels)
        print("模型训练完成！")

    def evaluate(self, features, labels):
        print("开始评估模型...")
        preds = self.model.predict(features)
        accuracy, ua, f1, precision, confuse_matrix = calculate_score_classification(preds, labels)
        return accuracy, ua, f1, precision, confuse_matrix

    def predict(self, features):
        return self.model.predict(features)

    def get_models(self):
        return self.model.get_models()

## 特征处理函数
def feature_process(feature):
    return feature

## 主函数
if __name__ == "__main__":
    ## 实例化模型
    auto_sklearn_model = MyAutoSklearn()
    
    ## 提取训练样本特征
    train_save_path = "./feature/train_feature.npy"
    train_feature = extract_audio_feature(train_path, train_save_path)  ## np.array (n,88)
    train_feature = feature_process(train_feature)

    ## 提取验证样本特征
    dev_save_path = "./feature/dev_feature.npy"
    dev_feature = extract_audio_feature(dev_path, dev_save_path)
    dev_feature = feature_process(dev_feature)

    ## 训练模型
    auto_sklearn_model.train(train_feature, train_label)
    
    ## 评估在训练集上的拟合效果
    acc, ua, f1, pre, confuse_matrix = auto_sklearn_model.evaluate(train_feature, np.array(train_label))
    print(f"train:\nAcc:{acc} \nUa:{ua} \nMacro_F1:{f1} \nPre:{pre}\nConfuse_matrix:\n{confuse_matrix}")
    
    ## 计算在dev上的性能
    acc, ua, f1, pre, confuse_matrix = auto_sklearn_model.evaluate(dev_feature, np.array(dev_label))
    print(f"dev:\nAcc:{acc} \nUa:{ua} \nMacro_F1:{f1} \nPre:{pre}\nConfuse_matrix:\n{confuse_matrix}")
    
    # 获取所有尝试过的模型及其参数
    models = auto_sklearn_model.get_models()
    print("所有尝试过的模型及其参数：")
    for model in models:
        print(model)
    
    # 将模型信息保存到文件
    with open("models_info.txt", "w") as f:
        for model in models:
            f.write(str(model) + "\n")
    
    ## 读入test.csv
    test_csv = pd.read_csv("./CSVfile/test.csv", sep="#")
    test_path = list(test_csv.path)
    test_save_path = "./feature/test_feature.npy"
    test_feature = extract_audio_feature(test_path, test_save_path)
    test_feature = feature_process(test_feature)
    
    ## 使用模型预测测试集
    test_preds = auto_sklearn_model.predict(test_feature)
    print(len(test_feature))
    
    ## 将预测结果写入到result.csv文件中
    def write_result(test_preds):
        if len(test_preds) != 1241:
            print("错误！请检查test_preds长度是否为1241！！！")
            return -1
        test_csv["label"] = test_preds
        test_csv.to_csv("./result.csv", sep="#")
        print("测试集预测结果已成功写入到文件中！")
    
    write_result(test_preds)

SyntaxError: invalid syntax (3740549208.py, line 4)