In [2]:
import os
import pandas as pd
import numpy as np
from collections import Counter
from scipy.stats import entropy


from utils_entropy import calculate_entropy
from utils import load_yaml

config = load_yaml()

results_df = pd.DataFrame(columns=["Sport", "#Teams", "#Games","#Seasons", "Entropy Observed", "Entropy Luck"])

In [3]:

def append_to_results_df(observed_entropy, luck_entropy, n_years, n_teams, n_games, sport):
    """
    Append the results to the results dataframe.

    Parameters
    ----------
    observed_entropy : float
        The observed entropy.
    luck_entropy : float
        The entropy of the luck simulation.
    n_years : int
        The number of years used for calculation.
    n_teams : int
        The number of teams.
    sport : str
        The name of the sport.

    Returns
    -------
    None.

    """
    global results_df
    # concat to results df
    tmp_df = pd.DataFrame(
        {
            "Sport": [sport],
            "#Teams": [n_teams],
            "#Seasons": [n_years],
            "#Games": [n_games],
            "Entropy Observed": [observed_entropy],
            "Entropy Luck": [luck_entropy],
        }
    )
    results_df = pd.concat([results_df, tmp_df], ignore_index=True)


def calculate_entropy_and_append_to_results_df(path_to_prepared_data, sport_name):
    """
    Calculate the entropy of the observed and simulated results of a sport/league and append the results to the results dataframe.

    Parameters
    ----------
    path_to_prepared_data : str
        Path to the prepared data.
    sport_name : str
        The name of the sport.

    Returns
    -------
    None.

    """
    df, observed_entropy, simulated_entropy = calculate_entropy(path_to_prepared_data, sport_name)
    n_teams = df["#Teams"].mean()
    n_games = df["#Games"].mean()
    n_years = len(df)
    append_to_results_df(observed_entropy, simulated_entropy, n_years, n_teams, n_games, sport_name)



    

# Calculate observed and simulated Entropy

In [4]:
sport_list = [x for x in config.keys() if "General" not in x]
for sport in sport_list:
    print("#" * 80)
    print("Sport: ", sport)
    calculate_entropy_and_append_to_results_df(config[sport]["results_path"], sport)

################################################################################
Sport:  NBA


KeyError: 'results_path'

In [None]:
results_df

['NBA',
 'NFL',
 'NHL',
 'MLS',
 'MLB',
 'PML',
 'LaLiga',
 'Bundesliga',
 'Ligue1',
 'SerieA']