In [6]:
import numpy as np
from scipy.stats import f


def calculate_ANOVA_metrics(data, levels_list, replicates):
    num_factors = len(levels_list)
    num_treatments = np.prod(levels_list)
    grand_mean = np.mean(data)
    ss_total = np.sum((data - grand_mean)**2)
    ss_factor = 0
    ss_error = 0

    #coefficient_matrix = get_coefficient_matrix(levels_list, replicates)
    #projection_matrix = np.dot(np.linalg.inv(np.dot(coefficient_matrix.T, coefficient_matrix)), coefficient_matrix.T)

    for i in range(num_treatments):
        factor_mean = np.mean(data[i*replicates:(i+1)*replicates])
        ss_factor += replicates*(factor_mean - grand_mean)**2
        ss_error += np.sum((data[i*replicates:(i+1)*replicates] - factor_mean)**2)

    dof_factor = num_factors - 1
    dof_error = num_treatments*replicates - num_factors
    dof_total = num_treatments*replicates - 1

    ms_factor = ss_factor/dof_factor
    ms_error = ss_error/dof_error
    f_statistic = ms_factor/ms_error
    p_value = f.sf(f_statistic, dof_factor, dof_error)

    return {
        'Source of variation': ['Factor', 'Error', 'Total'],
        'Sum of squares': [ss_factor, ss_error, ss_total],
        'Degrees of freedom': [dof_factor, dof_error, dof_total],
        'Mean square': [ms_factor, ms_error, '-'],
        'F statistic': [f_statistic, '-', '-'],
        'p value': [p_value, '-', '-']
    }


y_measures = np.array([28,25,27,36,32,32,18,19,23,31,30,29])
levels_list = np.array([2,2])
replicates = 3

calculate_ANOVA_metrics(y_measures, levels_list, replicates)


{'Source of variation': ['Factor', 'Error', 'Total'],
 'Sum of squares': [291.66666666666674, 31.333333333333332, 323.0],
 'Degrees of freedom': [1, 10, 11],
 'Mean square': [291.66666666666674, 3.1333333333333333, '-'],
 'F statistic': [93.08510638297875, '-', '-'],
 'p value': [2.205264786399596e-06, '-', '-']}

In [3]:
def calculate_ANOVA_metrics2(data, levels_list, replicates):
    # Get coefficient matrix
    coeff_matrix = get_coefficient_matrix(levels_list, replicates)

    # Calculate total sum of squares
    grand_mean = np.mean(data)
    SS_total = np.sum((data - grand_mean) ** 2)

    # Calculate sum of squares for each factor
    SS_factors = []
    for i in range(len(levels_list)):
        factor_mean = np.mean(data[np.sum(coeff_matrix[:, :i], axis=1) == 0])
        SS_factor = replicates * np.sum((factor_mean - grand_mean) ** 2)
        SS_factors.append(SS_factor)

    # Calculate sum of squares for error term
    SS_error = SS_total - np.sum(SS_factors)

    # Calculate degrees of freedom
    df_total = np.prod(levels_list) * replicates - 1
    df_factors = np.array(levels_list) - 1
    df_error = df_total - np.sum(df_factors)

    # Calculate mean squares
    MS_factors = np.array(SS_factors) / df_factors
    MS_error = SS_error / df_error

    # Calculate F-metric and p-value for each factor
    F_factors = MS_factors / MS_error
    p_factors = f.sf(F_factors, df_factors, df_error)

    return {
        'SS_total': SS_total,
        'SS_factors': SS_factors,
        'SS_error': SS_error,
        'df_total': df_total,
        'df_factors': df_factors,
        'df_error': df_error,
        'MS_factors': MS_factors,
        'MS_error': MS_error,
        'F_factors': F_factors,
        'p_factors': p_factors
    }

In [4]:
calculate_ANOVA_metrics2(y_measures, levels_list, replicates)

{'SS_total': 323.0,
 'SS_factors': [0.0, 1.6875],
 'SS_error': 321.3125,
 'df_total': 11,
 'df_factors': array([1, 1]),
 'df_error': 9,
 'MS_factors': array([0.    , 1.6875]),
 'MS_error': 35.701388888888886,
 'F_factors': array([0.        , 0.04726707]),
 'p_factors': array([1.        , 0.83273777])}

In [5]:
get_coefficient_matrix(levels_list, replicates)

array([[-0.5,  0. ,  0. ,  0. ],
       [ 0.5, -0.5,  0. ,  0. ],
       [ 1.5,  0. ,  0. ,  0. ],
       [ 2.5,  0.5,  0. ,  0. ],
       [ 0. ,  1. ,  0. ,  0. ],
       [ 0. ,  1. ,  0. ,  0. ],
       [ 0. ,  0. ,  1. ,  0. ],
       [ 0. ,  0. ,  1. ,  0. ],
       [ 0. ,  0. ,  1. ,  0. ],
       [ 0. ,  0. ,  0. ,  1. ],
       [ 0. ,  0. ,  0. ,  1. ],
       [ 0. ,  0. ,  0. ,  1. ]])