In [1]:
import pandas as pd
import numpy as np
from collections import Counter, OrderedDict

# Simulation of NFL regular season games with pure luck

### Goal: Simulate NBA regular season
 
#### **League format currently:**

From Wikipedia: 

*Currently, the fourteen different opponents each team faces over the 17-game regular season schedule are set using a pre-determined formula:*

*Each team plays twice against each of the other three teams in its division: once at home, and once on the road (six games).
Each team plays once against each of the four teams from a predetermined division (based on a three-year rotation) within its own conference: two at home, and two on the road (four games).
Each team plays once against one team from the remaining two divisions within its conference that finished in a similar placement in the final divisional standings in the prior season:[a] one at home, one on the road (two games).
Each team plays once against each of the four teams from a predetermined division (based on a four-year rotation) in the other conference: two at home, and two on the road (four games).
Each team also plays one game against the team from a predetermined division (based on a four-year rotation) in the other conference that finished in a similar placement in the final divisional standings in the prior season[a] (one game). Each team plays six of the teams from the other two divisions in its conference four times (24 games), and the remaining four teams three times (12 games). Finally, each team plays all the teams in the other conference twice apiece (30 games).*

#### **Winning condition:**

The probabilitie of a tie is very low in the NFL. According to [here](https://www.sports-king.com/odds-nfl-game-tie-2889/#:~:text=Based%20on%20the%20numbers%2C%20there,game%20ends%20in%20a%20tie.&text=The%20lowest%20scoring%20tie%20game,to%20a%207%2D7%20tie.) the probability is just 0.2%.

Therefore we assign the probabilities of winning: Team A: 49.9%, Team B: 49.9%, Draw: 0.2%


## Simulate NFL Seasons

### Use scrapped schedules for simulation

Scrapped schedules are available in the `prepared_data/schedules` folder. Scrapping happened in `data_preparation/NFL_schedule_scrapper.ipynb`

In [2]:
def simulate_season(df):

    # get number of games played in the season
    ngames = max((Counter(df["Home"])+Counter(df["Away"])).values())

    # Assign a winner 0.4999, 0.4999, 0.0002
    df["Winner"] = np.random.choice(["Home", "Away", "Tie"], size=len(df), p=[0.4999, 0.4999, 0.0002])
    df["Winner Team"] = df.apply(lambda x: x["Home"] if x["Winner"]=="Home" else (x["Away"] if x["Winner"]=="Away" else "Tie"), axis=1)
    df["Looser Team"] = df.apply(lambda x: x["Home"] if x["Winner"]=="Away" else (x["Away"] if x["Winner"]=="Home" else "Tie"), axis=1)
    return df, ngames

In [3]:
df_nfl_simulated = pd.DataFrame(columns=["Year" ,"Teams", "Wins", "Losses", "Ties", "#Games"])
years_to_simulate = np.arange(2002, 2022)


for year in years_to_simulate:
    # Read schedule for the year
    try:
        df = pd.read_csv('../prepared_data/schedules/NFL_{}.csv'.format(year))
    except:
        print("No schedule data for year {}".format(year))
        continue

    simulation, ngames = simulate_season(df)

    win_dict = OrderedDict(sorted(Counter(simulation["Winner Team"]).items()))
    lose_dict = OrderedDict(sorted(Counter(simulation["Looser Team"]).items()))

    if "Tie" in win_dict:
        win_dict.pop("Tie")
        lose_dict.pop("Tie")


    teams = list(win_dict.keys())
    wins = list(win_dict.values())
    losses = list(lose_dict.values())
    ties = [ngames -x -y for x,y in zip(wins,losses)]


    df_tmp = pd.Series({"Year": year, "Teams": teams, "Wins": wins, "Losses": losses, "Ties": ties, "#Games": ngames}).to_frame().T
    df_nfl_simulated = pd.concat([df_nfl_simulated ,df_tmp])

In [4]:
df_nfl_simulated.to_parquet("../prepared_data/simulations/NFL_simulated.parquet")