In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.preprocessing import MinMaxScaler

import os

from utils import preprocess_observed_data, calculate_observed_variance, get_available_schedules, calculate_luck_variance_per_year, load_yaml
from simulation_utils import simulate_league, simulate_league_multiple_times, calculate_variance_of_simulated_leagues

config = load_yaml()

results_df = pd.DataFrame(columns=["Sport", "#Teams (avg)", "#Seasons", "Variance Observed", "Variance Luck"])

In [2]:
NUMBER_OF_SIMULATIONS = config["General"]["number_of_simulations"]

In [5]:
def calculate_observed_and_luck_variance(prepared_data_path, schedule_directory, probabilities_win_loss_tie, points_for_win_loss_tie, number_of_simulations, type="teams"):
    """
    Calculate the observed and luck variance.

    Parameters
    ----------
    prepared_data_path : str
        The path to the prepared data.
    schedule_directory : str
        The directory where the schedules are stored.
    probabilities_win_loss_tie : list
        The probabilities a team wins, loses or ties.
    points_for_win_loss_tie : list
        The points a team gets for a win, a loss and a tie.
    number_of_simulations : int
        The number of simulations.

    Returns
    -------
    pandas.DataFrame
        The dataframe containing the observed data with a new luck variance column that contains the luck variance for every year where a schedule is available.
    float
        The average observed variance.
    float
        The average variance of the luck simulation.
    """



    df_prepared = preprocess_observed_data(prepared_data_path, points_for_win_loss_tie)
    available_schedules = get_available_schedules(schedule_directory)
    df_prepared = calculate_luck_variance_per_year(df_prepared, available_schedules,  probabilities_win_loss_tie, points_for_win_loss_tie, number_of_simulations, min_max_scaling=False, type=type)

    # filter for values where simulation was possible
    df_prepared_filtered = df_prepared[~df_prepared["Luck_variance"].isnull()]
    n_years = len(df_prepared_filtered)
    n_teams = np.mean(df_prepared_filtered["#Teams"])

    observed_variance = df_prepared_filtered["Variance_observed"].mean()
    luck_variance = df_prepared_filtered["Luck_variance"].mean()

    return df_prepared_filtered, observed_variance, luck_variance, n_years, n_teams

def append_to_results_df(observed_variance, luck_variance, n_years, n_teams, sport):
    """
    Append the results to the results dataframe.

    Parameters
    ----------
    observed_variance : float
        The average observed variance.
    luck_variance : float
        The average variance of the luck simulation.
    n_years : int
        The number of years where a schedule was available.
    n_teams : int
        The number of teams.
    sport : str
        The name of the sport.

    Returns
    -------
    None.

    """
    global results_df
    # concat to results df
    tmp_df = pd.DataFrame(
        {
            "Sport": [sport],
            "#Teams (avg)": [n_teams],
            "#Seasons": [n_years],
            "Variance Observed": [observed_variance],
            "Variance Luck": [luck_variance],
        }
    )
    results_df = pd.concat([results_df, tmp_df], ignore_index=True)




### NBA

In [6]:
df_prepared_nba, observed_variance_nba, luck_variance_nba, n_years, n_teams = calculate_observed_and_luck_variance(config["NBA"]["prepared_data_path"], config["NBA"]["schedule_directory"], config["NBA"]["probabilities_win_loss_tie"], config["NBA"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_nba, luck_variance_nba, n_years, n_teams, "NBA")

The schedule for the year 2004 is available.
The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is not available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2020 is not available.
The schedule for the year 2021 is available.


### NFL

In [14]:
df_prepared_nfl, observed_variance_nfl, luck_variance_nfl, n_years, n_teams  = calculate_observed_and_luck_variance(config["NFL"]["prepared_data_path"], config["NFL"]["schedule_directory"], config["NFL"]["probabilities_win_loss_tie"], config["NFL"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_nfl, luck_variance_nfl, n_years, n_teams, "NFL")

The schedule for the year 2003 is available.
The schedule for the year 2004 is available.
The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2020 is available.
The schedule for the year 2021 is available.


### NHL

In [15]:
df_prepared_nhl, observed_variance_nhl, luck_variance_nhl, n_years, n_teams = calculate_observed_and_luck_variance(config["NHL"]["prepared_data_path"], config["NHL"]["schedule_directory"], config["NHL"]["probabilities_win_loss_tie"], config["NHL"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_nhl, luck_variance_nhl, n_years, n_teams, "NHL")

The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is not available.
The schedule for the year 2020 is not available.
The schedule for the year 2021 is not available.


### MLB

In [16]:
df_prepared_mlb, observed_variance_mlb, luck_variance_mlb, n_years, n_teams = calculate_observed_and_luck_variance(config["MLB"]["prepared_data_path"], config["MLB"]["schedule_directory"], config["MLB"]["probabilities_win_loss_tie"], config["MLB"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_mlb, luck_variance_mlb, n_years, n_teams, "MLB")

The schedule for the year 2003 is available.
The schedule for the year 2004 is available.
The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2020 is available.
The schedule for the year 2021 is available.
The schedule for the year 2022 is available.


### MLS

In [17]:
df_prepared_mls, observed_variance_mls, luck_variance_mls, n_years, n_teams = calculate_observed_and_luck_variance(config["MLS"]["prepared_data_path"], config["MLS"]["schedule_directory"], config["MLS"]["probabilities_win_loss_tie"], config["MLS"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_mls, luck_variance_mls, n_years, n_teams, "MLS")

The schedule for the year 2003 is available.
The schedule for the year 2004 is available.
The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2020 is not available.
The schedule for the year 2021 is available.


#### PML

In [18]:
df_prepared_pml, observed_variance_pml, luck_variance_pml, n_years, n_teams = calculate_observed_and_luck_variance(config["PML"]["prepared_data_path"], config["PML"]["schedule_directory"], config["PML"]["probabilities_win_loss_tie"], config["PML"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_pml, luck_variance_pml, n_years, n_teams, "PML")

The schedule for the year 2003 is available.
The schedule for the year 2004 is available.
The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2020 is available.
The schedule for the year 2021 is available.


#### Ligue 1

In [19]:
df_prepared_ligue1, observed_variance_ligue1, luck_variance_ligue1, n_years, n_teams = calculate_observed_and_luck_variance(config["Ligue1"]["prepared_data_path"], config["Ligue1"]["schedule_directory"], config["Ligue1"]["probabilities_win_loss_tie"], config["Ligue1"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_ligue1, luck_variance_ligue1, n_years, n_teams, "Ligue1")

The schedule for the year 2003 is available.
The schedule for the year 2004 is available.
The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2020 is available.
The schedule for the year 2021 is available.


 #### SerieA

In [20]:
df_prepared_serieA, observed_variance_serieA, luck_variance_serieA, n_years, n_teams = calculate_observed_and_luck_variance(config["SerieA"]["prepared_data_path"], config["SerieA"]["schedule_directory"], config["SerieA"]["probabilities_win_loss_tie"], config["SerieA"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_serieA, luck_variance_serieA, n_years, n_teams, "SerieA")

The schedule for the year 2003 is available.
The schedule for the year 2004 is available.
The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2020 is available.
The schedule for the year 2021 is available.


#### LaLiga

In [21]:
df_prepared_laliga, observed_variance_laliga, luck_variance_laliga, n_years, n_teams = calculate_observed_and_luck_variance(config["LaLiga"]["prepared_data_path"], config["LaLiga"]["schedule_directory"], config["LaLiga"]["probabilities_win_loss_tie"], config["LaLiga"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_laliga, luck_variance_laliga, n_years, n_teams, "LaLiga")

The schedule for the year 2003 is available.
The schedule for the year 2004 is available.
The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2020 is available.
The schedule for the year 2021 is available.


#### Bundesliga

In [22]:
df_prepared_bundesliga, observed_variance_bundesliga, luck_variance_bundesliga, n_years, n_teams = calculate_observed_and_luck_variance(config["Bundesliga"]["prepared_data_path"], config["Bundesliga"]["schedule_directory"], config["Bundesliga"]["probabilities_win_loss_tie"], config["Bundesliga"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS)
append_to_results_df(observed_variance_bundesliga, luck_variance_bundesliga, n_years, n_teams, "Bundesliga")

The schedule for the year 2003 is available.
The schedule for the year 2004 is available.
The schedule for the year 2005 is available.
The schedule for the year 2006 is available.
The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2020 is available.
The schedule for the year 2021 is available.


#### Climbing

In [11]:
df_prepared_climbing, observed_variance_climbing, luck_variance_climbing, n_years, n_teams = calculate_observed_and_luck_variance(config["Climbing"]["prepared_data_path"], config["Climbing"]["schedule_directory"], config["Climbing"]["probabilities_win_loss_tie"], config["Climbing"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS, type='competition')
append_to_results_df(observed_variance_climbing, luck_variance_climbing, n_years, n_teams, "Climbing")

#### Bouldering

In [12]:
df_prepared_bouldering, observed_variance_bouldering, luck_variance_bouldering, n_years, n_teams  = calculate_observed_and_luck_variance(config["Bouldering"]["prepared_data_path"], config["Bouldering"]["schedule_directory"], config["Bouldering"]["probabilities_win_loss_tie"], config["Bouldering"]["points_for_win_loss_tie"], NUMBER_OF_SIMULATIONS, type='competition')
append_to_results_df(observed_variance_bouldering, luck_variance_bouldering, n_years, n_teams, "Bouldering")

The schedule for the year 2007 is available.
The schedule for the year 2008 is available.
The schedule for the year 2009 is available.
The schedule for the year 2010 is available.
The schedule for the year 2011 is available.
The schedule for the year 2012 is available.
The schedule for the year 2013 is available.
The schedule for the year 2014 is available.
The schedule for the year 2015 is available.
The schedule for the year 2016 is available.
The schedule for the year 2017 is available.
The schedule for the year 2018 is available.
The schedule for the year 2019 is available.
The schedule for the year 2021 is available.
The schedule for the year 2022 is available.


In [26]:
df_prepared_climbing

Unnamed: 0_level_0,Teams,Wins,Losses,Ties,Points,Points_scaled,Variance_observed_scaled,Variance_observed,#Teams,Luck_variance
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007,"[SCHUBERT, LACHAT, STREMFELJ, MRÁZEK, VERHOEVE...","[0.25, 0.23076923076923078, 0.2249999999999999...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.25, 0.23076923076923078, 0.2249999999999999...","[1.0, 0.9181929181929183, 0.8936507936507936, ...",0.027459,0.001517,292,0.008767
2008,"[ERNST, STREMFELJ, LAMA, MRÁZEK, VERHOEVEN, MA...","[0.6, 0.5714285714285714, 0.4, 0.3157894736842...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.6, 0.5714285714285714, 0.4, 0.3157894736842...","[0.9999999999999998, 0.9513546798029555, 0.659...",0.01355,0.004674,227,0.008389
2009,"[USOBIAGA LAKUNZA, ONDRA, AMMA, STREMFELJ, EIT...","[0.3333333333333333, 0.3, 0.1935483870967742, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.3333333333333333, 0.3, 0.1935483870967742, ...","[0.9999999999999999, 0.8957142857142857, 0.562...",0.017283,0.001766,243,0.007136
2010,"[JULIAN PUIGBLANQUE, SCHUBERT, EITER, ONDRA, ...","[0.3333333333333333, 0.27272727272727276, 0.25...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.3333333333333333, 0.27272727272727276, 0.25...","[1.0, 0.8093841642228742, 0.7379032258064517, ...",0.019456,0.001967,204,0.006976
2011,"[SCHUBERT, KIM, AMMA, MARKOVIC, EITER, LACHAT,...","[0.611111111111111, 0.4347826086956522, 0.3846...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.611111111111111, 0.4347826086956522, 0.3846...","[1.0, 0.7046061127851917, 0.6205635948210205, ...",0.010577,0.003769,333,0.006832
2012,"[ONDRA, MARKOVIC, AMMA, KIM, SCHUBERT, ERNST, ...","[0.5, 0.47368421052631576, 0.3225806451612903,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.5, 0.47368421052631576, 0.3225806451612903,...","[1.0, 0.9459165154264972, 0.6353726362625138, ...",0.014468,0.003425,304,0.006214
2013,"[ONDRA, KIM, MARKOVIC, AMMA, LACHAT, DURIF, SC...","[0.6666666666666666, 0.5333333333333333, 0.5, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.6666666666666666, 0.5333333333333333, 0.5, ...","[0.9999999999999999, 0.7947826086956522, 0.743...",0.013725,0.005794,228,0.005875
2014,"[AMMA, KIM, MARKOVIC, RÖCK, VERHOEVEN, SCHUBER...","[0.5, 0.36, 0.34615384615384615, 0.28125, 0.24...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.5, 0.36, 0.34615384615384615, 0.28125, 0.24...","[1.0, 0.712112676056338, 0.6836403033586133, 0...",0.011353,0.002685,289,0.006432
2015,"[GARNBRET, PILZ, MARKOVIC, SCHUBERT, ONDRA, KI...","[0.42857142857142855, 0.3684210526315789, 0.35...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.42857142857142855, 0.3684210526315789, 0.35...","[0.9999999999999999, 0.8558704453441295, 0.811...",0.017083,0.002975,265,0.006247
2016,"[GARNBRET, VERHOEVEN, NOGUCHI, ONDRA, KIM, SCH...","[0.5, 0.36363636363636365, 0.3333333333333333,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.5, 0.36363636363636365, 0.3333333333333333,...","[1.0, 0.7218721872187219, 0.6600660066006601, ...",0.011712,0.002815,318,0.005998


# Save Results

In [23]:
df_prepared_nba.to_parquet(config["NBA"]["results_path"])
df_prepared_nfl.to_parquet(config["NFL"]["results_path"])
df_prepared_nhl.to_parquet(config["NHL"]["results_path"])
df_prepared_mlb.to_parquet(config["MLB"]["results_path"])
df_prepared_mls.to_parquet(config["MLS"]["results_path"])
df_prepared_pml.to_parquet(config["PML"]["results_path"])
df_prepared_ligue1.to_parquet(config["Ligue1"]["results_path"])
df_prepared_serieA.to_parquet(config["SerieA"]["results_path"])
df_prepared_laliga.to_parquet(config["LaLiga"]["results_path"])
df_prepared_bundesliga.to_parquet(config["Bundesliga"]["results_path"])
df_prepared_climbing.to_parquet(config["Climbing"]["results_path"])
df_prepared_bouldering.to_parquet(config["Bouldering"]["results_path"])

# Calculating the luck contribution for each sport

### Calculation:

Based on [Classical Test Theory](https://en.wikipedia.org/wiki/Classical_test_theory) we assume that

$$ Observed Score = True Score + Error Score $$

In our case *True Score* reflects the skill component while the *Error Score* reflects the luck component.

Since we are assuming the two variables Luck and Skill to be independent we can estimate:

$$Var(Skill) =  Var(Observed) - Var(Luck) $$

We can estimate the contribution of skill (reliability) with:

$$ \rho_{OS}^2 = \frac{\sigma_S^2}{\sigma_O^2} = \frac{\sigma_O^2 - \sigma_L^2}{\sigma_O^2} = 1 - \frac{\sigma_L^2}{\sigma_O^2} $$ 

and the contribution of luck

$$ \rho_{OL}^2 = \frac{\sigma_L^2}{\sigma_O^2} $$ 


In [25]:
luck_contribution_nba = luck_variance_nba/observed_variance_nba
luck_contribution_nfl = luck_variance_nfl/observed_variance_nfl
luck_contribution_nhl = luck_variance_nhl/observed_variance_nhl
luck_contribution_mlb = luck_variance_mlb/observed_variance_mlb
luck_contribution_mls = luck_variance_mls/observed_variance_mls
luck_contribution_pml = luck_variance_pml/observed_variance_pml
luck_contribution_ligue1 = luck_variance_ligue1/observed_variance_ligue1
luck_contribution_serieA = luck_variance_serieA/observed_variance_serieA
luck_contribution_laliga = luck_variance_laliga/observed_variance_laliga
luck_contribution_bundesliga = luck_variance_bundesliga/observed_variance_bundesliga
luck_contribution_climbing = luck_variance_climbing/observed_variance_climbing
luck_contribution_bouldering = luck_variance_bouldering/observed_variance_bouldering

print("NBA: ", luck_contribution_nba)
print("NFL: ", luck_contribution_nfl)
print("NHL: ", luck_contribution_nhl)
print("MLB: ", luck_contribution_mlb)
print("MLS: ", luck_contribution_mls)
print("PML: ", luck_contribution_pml)
print("Ligue1: ", luck_contribution_ligue1)
print("SerieA: ", luck_contribution_serieA)
print("LaLiga: ", luck_contribution_laliga)
print("Bundesliga: ", luck_contribution_bundesliga)
print("Climbing: ", luck_contribution_climbing)
print("Bouldering: ", luck_contribution_bouldering)

NBA:  0.13285931642120546
NFL:  0.4201521757715654
NHL:  0.4239181528677859
MLB:  0.29666808360761265
MLS:  0.7871272254607004
PML:  0.28235624759502476
Ligue1:  0.4346010495085048
SerieA:  0.29858557095860105
LaLiga:  0.323813770557511
Bundesliga:  0.38020653760007445
Climbing:  1.8511709557695513
Bouldering:  1.2639937555349532


In [24]:
results_df["Luck Contribution"] = results_df["Variance Luck"] / results_df["Variance Observed"]
results_df.sort_values(by="Luck Contribution")

Unnamed: 0,Sport,#Teams (avg),#Seasons,Variance Observed,Variance Luck,Luck Contribution
0,NBA,30.0,16,614.527222,81.645667,0.132859
7,PML,20.0,19,300.569342,84.867632,0.282356
5,MLB,30.0,20,137.228333,40.711267,0.296668
9,SerieA,19.894737,19,288.607558,86.174053,0.298586
10,LaLiga,20.0,19,262.621711,85.040526,0.323814
11,Bundesliga,18.0,19,202.273717,76.905789,0.380207
3,NFL,32.0,19,9.619243,4.041546,0.420152
4,NHL,30.142857,14,187.313714,79.405684,0.423918
8,Ligue1,20.0,19,199.391579,86.655789,0.434601
6,MLS,17.388889,18,93.051406,73.243295,0.787127


In [34]:
wins = df_prepared_climbing["Wins"].values[0]
athlethes = df_prepared_climbing["Teams"].values[0]

In [32]:
len(wins)

292

In [41]:
athletes_points = {}
athletes_in_competition = athlethes
total_starter = len(athlethes)# +1 because we start at 1
ranks = np.random.choice(np.arange(1,total_starter+1), size=len(athletes_in_competition), replace=False)
for athlete, rank in zip(athletes_in_competition, ranks):
    if athlete not in athletes_points:
        athletes_points[athlete] = [rank]
    else:
        athletes_points[athlete].append(rank)

In [42]:
athletes_points

{'SCHUBERT': [283],
 'LACHAT': [6],
 'STREMFELJ': [278, 214],
 'MRÁZEK': [185],
 'VERHOEVEN': [267],
 'SARKANY': [24],
 'USOBIAGA LAKUNZA': [14],
 'EITER': [245],
 'CRESPI': [220],
 'JULIAN  PUIGBLANQUE': [92],
 'KIM': [112, 207, 197],
 'MARIN GARCIA': [186],
 'GROS': [5],
 'HARRINGTON': [65],
 'LEVET': [37],
 'KOBAYASHI': [154],
 'DURIF': [243],
 'SON': [47],
 'LAMA': [180],
 'MILLET': [191],
 'MARKOVIC': [238],
 'AMMA': [162],
 'CIAVALDINI': [227],
 'ANDA VILLANUEVA': [139],
 'MATSUSHIMA': [170],
 'HORI': [84],
 'SAURWEIN': [192],
 'EYER': [49],
 'FUSELIER': [206],
 'MIDTBOE': [41, 75],
 'MOGAKI': [69],
 'STRANIK': [63],
 'SUPPER': [261],
 'OCIEPKA': [176],
 'LAVARDA': [122],
 'BACHER': [12],
 'FRANKO': [71],
 'SHAGINA': [103],
 'DESGRANGES': [26],
 'CHERESHNEVA': [209],
 'ROMAIN': [195],
 'WINKLER': [268],
 'PINET': [169],
 'WATANABE': [172, 21],
 'FISCHHUBER': [210],
 'KIMURA': [16],
 'DUGIT': [259, 38],
 'GOSAR': [292],
 'NOGUCHI': [100],
 'BARATZADEH': [17],
 'TANAKA': [86, 269],