## Created by yunsuxiaozi 2024/3/19

### 这里实现了机器学习一些比较常用的评估指标和损失函数,并且给出了对应的比赛链接,各位可以通过比赛来了解这些评估指标的使用。后续可能会更新更多机器学习中常用的评估指标和损失函数.

In [1]:
import numpy as np#进行矩阵运算的库
import torch #一个深度学习的库Pytorch
import torch.nn as nn#neural network,神经网络
import torch.nn.functional as F#神经网络函数库

print("Regression")
print(f"-------MSE--------")
def MSE(y_true,y_pred):
    return np.mean((y_true-y_pred)**2)
print(f"-------MAE--------")
def MAE(y_true,y_pred):
    return np.mean(np.abs(y_true-y_pred))
print(f"-------MedAE--------competition:https://www.kaggle.com/competitions/playground-series-s3e25")
def MedAE(y_true,y_pred):
    return np.median(np.abs(y_true-y_pred)) 
print(f"-------RMSE--------competition:https://www.kaggle.com/competitions/linking-writing-processes-to-writing-quality")
def RMSE(y_true,y_pred):
    return np.sqrt(np.mean((y_true-y_pred)**2))
print(f"-------RMSLE--------competition:https://www.kaggle.com/competitions/store-sales-time-series-forecasting")
def RMSLE(y_true,y_pred):
    return np.sqrt(np.mean((np.log1p(y_pred)-np.log1p(y_true))**2))

print("classification")
print(f"-------logloss--------competition:https://www.kaggle.com/competitions/playground-series-s3e26")
#评估指标是log_loss
def log_loss(y_true,y_pred,eps=10**(-15)):
    y_true=np.clip(y_true,eps,1-eps)
    y_pred=np.clip(y_pred,eps,1-eps)
    return -np.mean(np.sum(y_true*np.log(y_pred),axis=-1))
print(f"-------accuracy--------competition:https://www.kaggle.com/competitions/ml-olympiad-toxic-language-ptbr-detection")
def accuracy(y_true,y_pred):
    return np.mean(y_true==y_pred)
print(f"-------roc_auc--------competition:https://www.kaggle.com/competitions/playground-series-s4e3")
def roc_auc(y_true,y_pro):#y_true是真实标签,y_pro是预测的概率值.
    steps=10000
    x=[]
    y=[]
    for step in range(steps):
        step_idx=step/steps
        y_pred=np.where(y_pro>=step_idx,1,0)
        #查全率(Recall),真正例（True Positive Rate，TPR）:在所有真实的正例中，预测为正例的比例 
        TPR=np.sum(np.where(y_true+y_pred==2,1,0))/np.sum(y_true)
        #假正例（False Positive Rate，FPR），表示所有负例中，预测为正例的比例：
        FPR=np.sum((y_true==0)&(y_pred==1))/np.sum(y_true==0)
        x.append(FPR)
        y.append(TPR)
    #希尔排序算法(nlogn)
    mid=steps//2
    while(mid):
        for i in range(0,mid):
            for j in range(i+mid,steps,mid):
                if x[j]<x[j-mid]:
                    k=j
                    while((k>=mid)&(x[k]<=x[k-mid])):
                        if x[k]<x[k-mid]:
                            x[k],x[k-mid]=x[k-mid],x[k]
                            y[k],y[k-mid]=y[k-mid],y[k]
                        elif (x[k]==x[k-mid])&(y[k]<y[k-mid]):
                            y[k],y[k-mid]=y[k-mid],y[k]
                        k-=mid
        mid=mid//2
    AUC=0
    for i in range(len(y)-1):
        AUC+=(x[i+1]-x[i])*(y[i+1]+y[i])/2
    return AUC
#分类模型想要评估模型的好坏还是看预测的概率值,而不是预测的类别,f1-score,macro-f1暂时不写.
print(f"-------KL_divergence--------competition:https://www.kaggle.com/competitions/hms-harmful-brain-activity-classification")
def KL_divergence(p,q,epsilon=10**(-15)):
    p=torch.clip(p,epsilon,1-epsilon)
    q = F.log_softmax(q,dim=1)
    #对第一个维度,就是num_classes维度的损失求和,得到每个样本的损失,然后对第0维求平均,得到每个样本平均KL散度.
    return torch.mean(torch.sum(p*(torch.log(p)-q),dim=1))
print(f"-------cross_entropy_loss--------competition:https://www.kaggle.com/competitions/digit-recognizer")

def cross_entropy_loss(y_pro,y_target,eps=1e-15):
    y_target=torch.eye(y_pro.shape[-1])[y_target]
    y_pro=torch.clip(y_pro,eps,1-eps)
    y_target=torch.clip(y_target,eps,1-eps)
    return -torch.mean(torch.sum(y_target*((1-y_pro))*torch.log(y_pro),dim=1),dim=0)
def focal_loss(y_pro,y_target,eps=1e-15,gamma=0.25):
    y_target=torch.eye(y_pro.shape[-1])[y_target]
    y_pro=torch.clip(y_pro,eps,1-eps)
    y_target=torch.clip(y_target,eps,1-eps)
    return -torch.mean(torch.sum(y_target*((1-y_pro)**gamma)*torch.log(y_pro),dim=1),dim=0)

Regression
-------MSE--------
-------MAE--------
-------MedAE--------competition:https://www.kaggle.com/competitions/playground-series-s3e25
-------RMSE--------competition:https://www.kaggle.com/competitions/linking-writing-processes-to-writing-quality
-------RMSLE--------competition:https://www.kaggle.com/competitions/store-sales-time-series-forecasting
classification
-------logloss--------competition:https://www.kaggle.com/competitions/playground-series-s3e26
-------accuracy--------competition:https://www.kaggle.com/competitions/ml-olympiad-toxic-language-ptbr-detection
-------roc_auc--------competition:https://www.kaggle.com/competitions/playground-series-s4e3
-------KL_divergence--------competition:https://www.kaggle.com/competitions/hms-harmful-brain-activity-classification
-------cross_entropy_loss--------competition:https://www.kaggle.com/competitions/digit-recognizer
