# ML01 : Fonctions - Performances d'un classifieur

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
def compute_risk_per_class(YR, Yhat, K, L): 
    '''
    Parameters
    ----------
    YR : DataFrame
        Real labels.
    Yhat : Array
        Predicted labels.
    K : int
        Number of classes.
    L : Array
        Loss Function.

    Returns
    -------
    R : Array of floats
        Conditional risks.
    '''
   
    confmat = np.zeros((K, K))
    R = np.zeros(K)
    for k in range(0, K):
        nk = np.sum(YR==k+1)
        if nk > 0:
            Ik = np.where(YR==k+1)
            for l in range(0, K):
                confmat[k,l] = np.sum(Yhat[Ik]==l+1)/nk
        R[k] = L[k, :].dot(confmat[k, :]) 
    
    return R

In [3]:
def compute_average_risk(YR, Yhat, K, L):
    '''
    Parameters
    ----------
    YR : DataFrame
        Real labels.
    Yhat : Array
        Predicted labels.
    K : int
        Number of classes.
    L : Array
        Loss Function.

    Returns
    -------
    r : float
        Global risk.
    '''
    
    # Proportions par classe
    n = YR.shape[0]
    pi = np.zeros(K)
    for k in range(0, K):
        pi[k] = np.sum(YR==k+1)/n
        
    # Risques d'erreur par classe
    R = compute_risk_per_class(YR, Yhat, K, L)
    
    # Risque d'erreur moyen
    r = pi.dot(R)
    
    return r

In [4]:
def compute_accuracy(YR, Yhat):
    '''
    Parameters
    ----------
    YR : DataFrame
        Real labels.
    Yhat : Array
        Predicted labels.

    Returns
    -------
    Acc : float
        Accuracy.
    '''
    
    n = YR.shape[0]
    Acc = np.sum(Yhat==YR)/n
    
    return Acc

In [5]:
def compute_accuracy_per_class(YR, Yhat, K):
    '''
    Parameters
    ----------
    YR : DataFrame
        Real labels.
    Yhat : Array
        Predicted labels.
    K : int
        Number of classes.

    Returns
    -------
    Acc_per_class : Array
        Accuracies per class
    '''
    
    Acc_per_class = np.zeros(K)
    for k in range(0, K):
        nk = np.sum(YR==k+1)
        if nk > 0:
            Ik = np.where(YR==k+1)
            Acc_per_class[k] = np.sum(Yhat[Ik]==k+1)/nk
    
    return Acc_per_class

In [6]:
def compute_confusion_matrix(YR, Yhat, K):
    '''
    Parameters
    ----------
    YR : DataFrame
        Real labels.
    Yhat : Array
        Predicted labels.
    K : int
        Number of classes.

    Returns
    -------
    confmat : Array
        Confusio Matrix
    '''
    
    confmat = np.zeros((K, K))
    for k in range(0, K):
        nk = np.sum(YR==k+1)
        if nk > 0:
            Ik = np.where(YR==k+1)
            for l in range(0, K):
                confmat[k,l] = np.sum(Yhat[Ik]==l+1)/nk
    
    return confmat