## This notebook is in experimental phase.

In [None]:
import os
import pandas as pd
import numpy as np
from collections import Counter
from scipy.stats import entropy


from src.entropy.utils_entropy import calculate_entropy
from src.utils.utils import load_yaml

config = load_yaml()

results_df = pd.DataFrame(
    columns=[
        "Sport",
        "#Teams",
        "#Games",
        "#Seasons",
        "Entropy Observed",
        "Entropy Luck",
    ]
)

In [14]:
def append_to_results_df(
    observed_entropy, luck_entropy, n_years, n_teams, n_games, sport
):
    """
    Append the results to the results dataframe.

    Parameters
    ----------
    observed_entropy : float
        The observed entropy.
    luck_entropy : float
        The entropy of the luck simulation.
    n_years : int
        The number of years used for calculation.
    n_teams : int
        The number of teams.
    sport : str
        The name of the sport.

    Returns
    -------
    None.

    """
    global results_df
    # concat to results df
    tmp_df = pd.DataFrame(
        {
            "Sport": [sport],
            "#Teams": [n_teams],
            "#Seasons": [n_years],
            "#Games": [n_games],
            "Entropy Observed": [observed_entropy],
            "Entropy Luck": [luck_entropy],
        }
    )
    results_df = pd.concat([results_df, tmp_df], ignore_index=True)


def calculate_entropy_and_append_to_results_df(path_to_prepared_data, sport_name):
    """
    Calculate the entropy of the observed and simulated results of a sport/league and append the results to the results dataframe.

    Parameters
    ----------
    path_to_prepared_data : str
        Path to the prepared data.
    sport_name : str
        The name of the sport.

    Returns
    -------
    None.

    """
    df, observed_entropy, simulated_entropy = calculate_entropy(
        path_to_prepared_data, sport_name
    )
    n_teams = df["#Teams"].mean()
    n_games = df["#Games"].mean()
    n_years = len(df)
    append_to_results_df(
        observed_entropy, simulated_entropy, n_years, n_teams, n_games, sport_name
    )

# Calculate observed and simulated Entropy

In [15]:
# sport_list = [x for x in config.keys() if (("General" not in x) or ("Boulderin" not in x) or ("Climbing" not in x))]
sport_list = [
    "NBA",
    "NFL",
    "NHL",
    "MLS",
    "MLB",
    "PML",
    "LaLiga",
    "Bundesliga",
    "Ligue1",
    "SerieA",
]
for sport in sport_list:
    print("#" * 80)
    print("Sport: ", sport)
    calculate_entropy_and_append_to_results_df(config[sport]["results_path"], sport)

################################################################################
Sport:  NBA
################################################################################
Sport:  NFL
################################################################################
Sport:  NHL
################################################################################
Sport:  MLS
################################################################################
Sport:  MLB
################################################################################
Sport:  PML
################################################################################
Sport:  LaLiga
################################################################################
Sport:  Bundesliga
################################################################################
Sport:  Ligue1
################################################################################
Sport:  SerieA


In [16]:
results_df

Unnamed: 0,Sport,#Teams,#Games,#Seasons,Entropy Observed,Entropy Luck
0,NBA,30.0,81.4375,16,2.954527,2.007612
1,NFL,32.0,16.052632,19,2.666055,2.117298
2,NHL,30.142857,79.571429,14,2.41105,2.003568
3,MLS,17.388889,32.444444,18,2.465175,2.474503
4,MLB,30.0,156.85,20,2.294248,1.698383
5,PML,20.0,38.0,19,2.935511,2.375074
6,LaLiga,20.0,38.0,19,2.846776,2.38436
7,Bundesliga,18.0,34.0,19,2.862652,2.486416
8,Ligue1,20.0,37.421053,19,2.775052,2.387098
9,SerieA,19.894737,37.789474,19,2.921697,2.379157
