In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import scipy.stats as stats

In [9]:
# Function to calculate the goodness of fit test
def goodness_of_fit_test(y_true, y_pred, alpha=0.05):

    # Generate the confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Calculate the expected frequencies
    row_sums = cm.sum(axis=1)
    col_sums = cm.sum(axis=0)
    total = cm.sum()
    expected = np.outer(row_sums, col_sums) / total
    
    # Perform the chi-square test
    chi2, p_value, dof, ex = stats.chi2_contingency(cm)
    
    # Compare p-value with the level of significance
    if p_value < alpha:
        result = "Reject the null hypothesis"
    else:
        result = "Fail to reject the null hypothesis, For given LOS the model is performing good"
    
    return chi2, p_value, dof, expected, result

# Example usage
y_true = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
y_pred = [0, 0, 0, 1, 0, 1, 1, 1, 0, 1]

chi2, p, dof, expected, result = goodness_of_fit_test(y_true, y_pred)
print(f"Chi2: {chi2}, p-value: {p}, Degrees of Freedom: {dof}")
print("Expected frequencies:\n", expected)
print("Test result:", result)


Chi2: 1.6, p-value: 0.20590321073206466, Degrees of Freedom: 1
Expected frequencies:
 [[2.5 2.5]
 [2.5 2.5]]
Test result: Fail to reject the null hypothesis, For given LOS the model is performing good
