In [28]:
import numpy as np
import pandas as pd
from collections import Counter

# Simulation of NBA regular season games with pure luck

### Goal: Simulate NBA regular season
 
#### **League format:**
 - 82 games per team
 - 30 teams
 - 2 conferences
 - 15 teams per conference
 - 3 divisions per conference
 - 5 teams per division

#### **Regular Season rules:**

From Wikipedia:

*During the regular season, each team plays 82 games, 41 each home and away. A team faces opponents in its own division four times a year (16 games). Each team plays six of the teams from the other two divisions in its conference four times (24 games), and the remaining four teams three times (12 games). Finally, each team plays all the teams in the other conference twice apiece (30 games).*

#### **Winning condition:**

As there are no ties the probability of winning a game is 0.5 for each team.




### Simulate NBA Seasons

Luckily the format of the NBA Season has not changed drastically since 1990. Therefore it is sufficient to take an example schedule (e.g 2016) and use that schedule for simulation. The actual team name does not matter only the result of the regular season is important.


#### Simulate NBA Seasons 2004-2022

Schedule from https://www.basketball-reference.com/leagues/NBA_2016_games.html

excludig 2020-2021 season (Covid-19) and Lockout Season 2011-2012.

In [60]:
# Get an example schedule for the years
df = pd.read_csv("../prepared_data/schedules/NBA_schedule_2016.csv")
# look only at the regular season (first 1230 = 82*15 games)
df = df[:1230]
df = df[["Visitor/Neutral", "Home/Neutral"]].rename(columns={"Visitor/Neutral": "home", "Home/Neutral": "away"})


In [59]:
def simulate_season(df):
    # Assign a winner 50/50
    df["Winner"] = np.random.choice(["home", "away"], size=len(df), p=[0.5, 0.5])
    df["Winner Team"] = np.where(df["Winner"] == "home", df["home"], df["away"])
    counts = Counter(df["Winner Team"])
    return counts

In [66]:
years_to_simulate = np.arange(2004, 2023)
# exclude lockdown season
years_to_simulate = np.delete(years_to_simulate, np.where(years_to_simulate == 2020))

# exclude lockout season
years_to_simulate = np.delete(years_to_simulate, np.where(years_to_simulate == 2011))


In [74]:
df_nba_simulated = pd.DataFrame(columns=["Year" ,"Teams", "Wins", "Losses", "Ties", "#Games"])
for year in years_to_simulate:
    simulation = simulate_season(df)
    teams = list(simulation.keys())
    wins = list(simulation.values())
    losses = [82 - x for x in wins]
    ties = [0 for x in wins]
    ngames = 82


    df_tmp = pd.Series({"Year": year, "Teams": teams, "Wins": wins, "Losses": losses, "Ties": ties, "#Games": ngames}).to_frame().T
    df_nba_simulated = pd.concat([df_nba_simulated ,df_tmp])

In [76]:
df_nba_simulated.to_parquet("../prepared_data/simulations/NBA_simulated.parquet")