In [None]:
# original
def clar(y: np.ndarray, X: np.ndarray) -> float:
    """Calculate Cumulative LGD Accuracy Ratio (CLAR).

    CLAR measures the ability of predicted LGD categories to discriminate
    realized LGD values according to Ozdemir and Miu 2009.

    Args:
        y (np.ndarray): Vector of realized LGD values
        X (np.ndarray): Vector of predicted LGD values

    Returns:
        CLAR value for the predicted and realized LGD categories (np.float)
        Share of observations as a numpy array (np.ndarray)
        Cumulative share of correctly assigned LGD values as a numpy array (np.ndarray)

    Raises:
        ValueError: If either 'X' or 'y' contains NA values
        ValueError: If 'X' and 'y' have different lengths

    References:
        Ozdemir, B., Miu, P., 2009. Basel II Implementation. A Guide to
        Developing and Validating a Compliant Internal Risk Rating System.
        McGraw-Hill, USA.
    """

    # Check for NA values and length mismatch
    if any(pd.isna(X)) or any(pd.isna(y)):
        raise ValueError("Both 'X' and 'y' must not contain NA values")
    
    if len(X) != len(y):
        raise ValueError("Both 'X' and 'y' must be of the same length")

    # Number of observations
    nx = len(X)

    # Get sorted unique classes from both X and y
    classes = sorted(set(X).union(set(y)), reverse=True)
    
    # Count the occurrences of each class in X
    num = [np.sum(X == cls) for cls in classes]
    
    # Cumulative sum of occurrences
    cnum = np.cumsum(num)
    
    # Sort X and y based on the sorted order of X in descending order
    sorted_indices = np.argsort(X)[::-1]
    X_sorted = X[sorted_indices]
    y_sorted = y[sorted_indices]
    
    # Initialize the array to store the cumulative correct counts
    correct_cumulative = np.zeros(len(classes))
    
    # Calculate cumulative correct counts for each class
    for i, cls in enumerate(classes[:-1]):
        if cnum[i] > 0:
            correct_cumulative[i] = np.sum(y_sorted[:cnum[i]] >= cls)
    
    # Normalize by the number of observations
    correct_cumulative /= nx
    correct_cumulative[-1] = 1  # Last class should always be 1
    
    # Calculate the cumulative share of observations
    obs_cumulative = cnum / nx
    
    # Calculate the area under the curve (AUC) using trapezoidal rule
    auc = 0.0
    for i in range(1, len(classes)):
        base_width = obs_cumulative[i] - obs_cumulative[i - 1]
        height_left = correct_cumulative[i - 1]
        height_right = correct_cumulative[i]
        trapezoid_area = base_width * (height_left + height_right) / 2
        auc += trapezoid_area
    
    # CLAR value is twice the area under the curve
    clar_value = auc * 2
    
    return clar_value, obs_cumulative, correct_cumulative


In [None]:
clar(y, y_pred)[0]

In [None]:
import numpy as np
import pandas as pd

def clar2(y: np.ndarray, X: np.ndarray) -> float:
    """Calculate Cumulative LGD Accuracy Ratio (CLAR).

    Args:
        y (np.ndarray): Vector of realized LGD values
        X (np.ndarray): Vector of predicted LGD values

    Returns:
        CLAR value (float), Share of observations (np.ndarray), 
        Cumulative share of correctly assigned LGD values (np.ndarray)
    """
    
    # Validate inputs
    def validate_inputs(y, X):
        if any(pd.isna(X)) or any(pd.isna(y)):
            raise ValueError("Both 'X' and 'y' must not contain NA values")
        if len(X) != len(y):
            raise ValueError("Both 'X' and 'y' must be of the same length")
    
    # Calculate the cumulative correct assignments
    def calculate_cumulative_correct(y_sorted, classes, cnum):
        correct_cumulative = np.zeros(len(classes))
        for i in range(len(classes) - 1):
            if cnum[i] > 0:
                correct_cumulative[i] = np.sum(y_sorted[:cnum[i]] >= classes[i])
        correct_cumulative /= len(y_sorted)
        correct_cumulative[-1] = 1
        return correct_cumulative
    
    # Calculate the Area Under the Curve (AUC)
    def calculate_auc(obs_cumulative, correct_cumulative):
        auc = 0.0
        for i in range(1, len(obs_cumulative)):
            base_width = obs_cumulative[i] - obs_cumulative[i - 1]
            height_left = correct_cumulative[i - 1]
            height_right = correct_cumulative[i]
            trapezoid_area = base_width * (height_left + height_right) / 2
            auc += trapezoid_area
        return auc
    
    # Main logic
    validate_inputs(y, X)
    
    nx = len(X)
    classes = sorted(set(X).union(set(y)), reverse=True)
    num = [np.sum(X == cls) for cls in classes]
    cnum = np.cumsum(num)
    
    sorted_indices = np.argsort(X)[::-1]
    X_sorted = X[sorted_indices]
    y_sorted = y[sorted_indices]
    
    correct_cumulative = calculate_cumulative_correct(y_sorted, classes, cnum)
    obs_cumulative = cnum / nx
    
    auc = calculate_auc(obs_cumulative, correct_cumulative)
    clar_value = auc * 2
    
    return clar_value, obs_cumulative, correct_cumulative

In [None]:
import numpy as np
import pandas as pd

def loss_capture_ratio(y: np.ndarray, X: np.ndarray) -> float:
    """Calculate Loss Capture Ratio (LCR).

    Args:
        y (np.ndarray): Vector of actual values
        X (np.ndarray): Vector of predicted values

    Returns:
        LCR value (float), Ideal model (np.ndarray), Ideal observations (np.ndarray),
        Predicted model (np.ndarray), Predicted observations (np.ndarray)
    """

    # Validate inputs
    if any(pd.isna(X)) or any(pd.isna(y)):
        raise ValueError("Both 'X' and 'y' must not contain NA values")
    if len(X) != len(y):
        raise ValueError("Both 'X' and 'y' must be of the same length")
    if len(set(y)) == 1:
        raise ValueError("Actual values 'y' must not be constant")

    # Determine the perfect model and population
    ideal = np.flip(np.sort(y))
    ideal_model = ideal.cumsum() / ideal.sum()
    ideal_populations = np.insert(np.arange(1, ideal_model.size + 1) / ideal_model.size, 0, 0)
    ideal_model = np.insert(ideal_model, 0, 0)

    # Determine the developed model and population
    data = pd.DataFrame(data={"actual": y, "predicted": X})
    developed = (
        pd.merge(
            data,
            (data.groupby("predicted")["actual"].mean()).to_frame(),
            how="outer",
            left_on="predicted",
            right_index=True,
        )
        .iloc[:, [1, 2]]
        .sort_values(ascending=False, by="predicted")
        .iloc[:, 1]
        .values
    )
    developed_model = developed.cumsum() / developed.sum()
    developed_populations = np.insert(np.arange(1, developed_model.size + 1) / developed_model.size, 0, 0)
    developed_model = np.insert(developed_model, 0, 0)

    # Compute the areas under the perfect and developed curves
    perfect_area = np.trapz(ideal_model, ideal_populations)
    developed_area = np.trapz(developed_model, developed_populations)

    # Calculate the accuracy ratio
    accuracy_ratio = (developed_area - 0.5) / (perfect_area - 0.5)

    return accuracy_ratio, ideal_model, ideal_populations, developed_model, developed_populations

# Example usage
y = np.array([0.1, 0.4, 0.35, 0.8])
X = np.array([0.2, 0.3, 0.5, 0.7])
lcr_value, ideal_model, ideal_populations, developed_model, developed_populations = loss_capture_ratio(y, X)
print(f"LCR Value: {lcr_value}")


In [None]:
# Define the LossCaptureRatio class for comparison
import attr
import numpy as np
import pandas as pd
from typing import Tuple

class LossCaptureRatio:
    def __init__(self, actual: np.ndarray, predicted: np.ndarray):
        self.actual = actual
        self.predicted = predicted

    def compute_accuracy_ratio(self) -> float:
        ideal_model, ideal_populations = self._determine_perfect_model_and_population()
        developed_model, developed_populations = self._determine_developed_model_and_population()

        perfect_area = np.trapz(ideal_model, ideal_populations)
        developed_area = np.trapz(developed_model, developed_populations)

        accuracy_ratio = (developed_area - 0.5) / (perfect_area - 0.5)
        return accuracy_ratio

    def _determine_perfect_model_and_population(self) -> Tuple[np.ndarray, np.ndarray]:
        ideal = np.flip(np.sort(self.actual))
        model = ideal.cumsum() / ideal.sum()
        populations = np.insert(np.arange(1, model.size + 1) / model.size, 0, 0)
        return np.insert(model, 0, 0), populations

    def _determine_developed_model_and_population(self) -> Tuple[np.ndarray, np.ndarray]:
        data = pd.DataFrame(data={"actual": self.actual, "predicted": self.predicted})
        developed = (
            pd.merge(
                data,
                (data.groupby("predicted")["actual"].mean()).to_frame(),
                how="outer",
                left_on="predicted",
                right_index=True,
            )
            .iloc[:, [1, 2]]
            .sort_values(ascending=False, by="predicted")
            .iloc[:, 1]
            .values
        )
        model = developed.cumsum() / developed.sum()
        populations = np.insert(np.arange(1, model.size + 1) / model.size, 0, 0)
        return np.insert(model, 0, 0), populations

# Example usage for comparison
y = np.array([0.1, 0.4, 0.35, 0.8])
X = np.array([0.2, 0.3, 0.5, 0.7])

lcr = LossCaptureRatio(actual=y, predicted=X)
lcr_value = lcr.compute_accuracy_ratio()
print(f"LossCaptureRatio Value: {lcr_value}")

clar_value, _, _, _, _ = clar2(y, X)
print(f"CLAR Value: {clar_value}")


gini

In [None]:
def _determine_perfect_model_and_population(self) -> tuple:
        df = self.data.copy().sort_values(
            by=[ColumnName.WEIGHTED_DEFAULTS.value], ascending=False, ignore_index=True
        )
        model = (
            df[ColumnName.WEIGHTED_DEFAULTS.value]
        ).cumsum().values / self.weighted_defaults_sum
        population = (df[ColumnName.WEIGHT.value]).cumsum().values / self.weights_sum
        model = np.insert(model, 0, 0)
        population = np.insert(population, 0, 0)
 
        return model, population
 
def _determine_developed_model_and_population(self) -> tuple:
    df = (
        self.data.sort_values(by=[ColumnName.PREDICTED.value], ascending=False)
        .groupby([ColumnName.PREDICTED.value], sort=False)
        .sum()
    )
    model = (
        df[ColumnName.WEIGHTED_DEFAULTS.value].cumsum().values
        / self.weighted_defaults_sum
    )
    population = df[ColumnName.WEIGHT.value].cumsum().values / self.weights_sum
    model = np.insert(model, 0, 0)
    population = np.insert(population, 0, 0)

    return model, population

