# High-Level Plan

## Game-level sim
- Kenpom implied wp for each team
- Give players avg reg season pts for each

## Tournament-level sim

- Treat each region & final four as trees
- Simulate whole tournament n times
- For each game, use kenpom implied wp to determine winner
- Keep track of each player's estimated pts

## Best combo selection
- For each player, calculate a distribution of pts
- Naively use greedy algorithm selected the top 15 players
- More sophisticated approach: select best *set* of 15 players

## Backtesting
- Use historical data to see how well the model would have performed

---

In [84]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from collections import deque

## Game-level (kenpom)

In [197]:
def scrape_kenpom_to_df():
    """
    Scrape KenPom data and return it as a pandas DataFrame.
    
    Returns:
        pd.DataFrame: DataFrame containing KenPom data.
    """
    url = "https://kenpom.com/index.php?y=2024"
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        print(response)
        raise Exception(f"Failed to fetch data from {url}")
    
    soup = BeautifulSoup(response.text, 'html.parser')
    # Parse the HTML content using BeautifulSoup
    table = soup.find("table", id ="ratings-table")

    if table is None:
        raise Exception("Failed to find the ratings table on the page.")
    
    kenpom_df = pd.read_html(str(table))[0]
    
    return kenpom_df

In [230]:
kenpom_df = scrape_kenpom_to_df()
kenpom_df

  kenpom_df = pd.read_html(str(table))[0]


Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,...,Unnamed: 11_level_0,Unnamed: 12_level_0,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_1,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
Unnamed: 0_level_2,Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,...,Unnamed: 11_level_2,Unnamed: 12_level_2,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_3,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
Unnamed: 0_level_4,Unnamed: 0_level_4,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,...,Unnamed: 11_level_4,Unnamed: 12_level_4,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_5,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
Unnamed: 0_level_6,Unnamed: 0_level_6,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,...,Unnamed: 11_level_6,Unnamed: 12_level_6,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_7,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
Unnamed: 0_level_8,Unnamed: 0_level_8,Unnamed: 1_level_8,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,...,Unnamed: 11_level_8,Unnamed: 12_level_8,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_9,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
Unnamed: 0_level_10,Unnamed: 0_level_10,Unnamed: 1_level_10,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,...,Unnamed: 11_level_10,Unnamed: 12_level_10,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_11,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
Unnamed: 0_level_12,Unnamed: 0_level_12,Unnamed: 1_level_12,Unnamed: 2_level_12,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12,Unnamed: 6_level_12,Unnamed: 7_level_12,Unnamed: 8_level_12,Unnamed: 9_level_12,...,Unnamed: 11_level_12,Unnamed: 12_level_12,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_13,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
Unnamed: 0_level_14,Unnamed: 0_level_14,Unnamed: 1_level_14,Unnamed: 2_level_14,Unnamed: 3_level_14,Unnamed: 4_level_14,Unnamed: 5_level_14,Unnamed: 6_level_14,Unnamed: 7_level_14,Unnamed: 8_level_14,Unnamed: 9_level_14,...,Unnamed: 11_level_14,Unnamed: 12_level_14,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_15,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
Unnamed: 0_level_16,Unnamed: 0_level_16,Unnamed: 1_level_16,Unnamed: 2_level_16,Unnamed: 3_level_16,Unnamed: 4_level_16,Unnamed: 5_level_16,Unnamed: 6_level_16,Unnamed: 7_level_16,Unnamed: 8_level_16,Unnamed: 9_level_16,...,Unnamed: 11_level_16,Unnamed: 12_level_16,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_17,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
Unnamed: 0_level_18,Unnamed: 0_level_18,Unnamed: 1_level_18,Unnamed: 2_level_18,Unnamed: 3_level_18,Unnamed: 4_level_18,Unnamed: 5_level_18,Unnamed: 6_level_18,Unnamed: 7_level_18,Unnamed: 8_level_18,Unnamed: 9_level_18,...,Unnamed: 11_level_18,Unnamed: 12_level_18,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
Unnamed: 0_level_19,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg.1,ORtg,ORtg.1,DRtg,DRtg.1,NetRtg,NetRtg.1
0,1,Connecticut 1,BE,37-3,+36.43,127.5,1,91.1,4,64.6,...,+.037,95,+12.42,12,113.2,11,100.8,24,-3.40,283
1,2,Houston 1,B12,32-5,+31.17,118.9,19,87.7,2,63.5,...,+.042,86,+11.57,23,111.9,37,100.3,13,-1.02,226
2,3,Purdue 1,B10,34-5,+30.62,125.2,4,94.6,12,67.0,...,+.048,70,+14.65,2,114.4,3,99.8,4,+10.58,9
3,4,Auburn 4,SEC,27-8,+27.99,120.4,10,92.4,6,70.0,...,-.080,338,+9.49,60,111.9,38,102.4,72,+1.47,147
4,5,Tennessee 2,SEC,27-9,+26.61,116.8,28,90.2,3,69.3,...,-.026,257,+13.35,8,114.6,2,101.2,40,+8.97,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,360,IUPUI,Horz,6-26,-25.61,92.3,353,117.9,358,67.5,...,-.019,244,-3.06,247,106.1,179,109.2,338,-4.33,303
376,,,,,,,,,,,...,,,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,Strength of Schedule,NCSOS,NCSOS
377,Rk,Team,Conf,W-L,NetRtg,ORtg,ORtg,DRtg,DRtg,AdjT,...,Luck,Luck,NetRtg,NetRtg,ORtg,ORtg,DRtg,DRtg,NetRtg,NetRtg
378,361,Coppin St.,MEAC,2-27,-25.73,85.7,361,111.4,285,66.2,...,-.057,311,-5.12,305,102.9,302,108.0,272,+0.66,169


In [231]:
kenpom_df.columns = kenpom_df.columns.get_level_values(1)

In [233]:
# select only the relevant columns
kenpom_df = kenpom_df.iloc[:, :5]

# get rid of duplicate rows of header
kenpom_df = kenpom_df.drop_duplicates()

# drop extra row with header
kenpom_df.dropna(subset=['Team'], inplace=True)

# reset index after drops
kenpom_df.reset_index(drop=True, inplace=True)

# drop extra copy of column labels
kenpom_df.drop(kenpom_df[kenpom_df['Team'] == 'Team'].index, inplace=True)

# remove seed numbers
kenpom_df['Team'] = kenpom_df['Team'].str.replace(r' \d{1,2}', '', regex=True)

kenpom_df

Unnamed: 0,Rk,Team,Conf,W-L,NetRtg
0,1,Connecticut,BE,37-3,+36.43
1,2,Houston,B12,32-5,+31.17
2,3,Purdue,B10,34-5,+30.62
3,4,Auburn,SEC,27-8,+27.99
4,5,Tennessee,SEC,27-9,+26.61
...,...,...,...,...,...
357,358,Pacific,WCC,6-26,-22.84
358,359,Stonehill,NEC,4-27,-22.93
359,360,IUPUI,Horz,6-26,-25.61
360,361,Coppin St.,MEAC,2-27,-25.73


In [None]:
def wp_kenpom(team1, team2, ratings_df, sd=11):
    # Extract ratings for each team in the matchup
    # First, check if team name is in the kenpom teams
    if team1.team_name not in kenpom_df['Team'].values:
        team1_kp = sr_to_kenpom[team1.team_name]
    else:
        team1_kp = team1.team_name
    
    if team2.team_name not in kenpom_df['Team'].values:
        team2_kp = sr_to_kenpom[team2.team_name]
    else:
        team2_kp = team2.team_name
        
    rating1 = ratings_df[ratings_df['Team'] == team1_kp]['NetRtg'].values[0]
    rating2 = ratings_df[ratings_df['Team'] == team2_kp]['NetRtg'].values[0]
    rating_diff = float(rating1) - float(rating2)

    # Calculate the probability of team1 winning
    prob_team1_wins = 1 / (1 + np.exp(-rating_diff / sd))

    return prob_team1_wins

def simulate_game_kenpom(team1, team2, ratings_df):
    # Calculate the probability of team1 winning
    prob_team1_wins = wp_kenpom(team1, team2, ratings_df)

    # Simulate the game based on the probability
    if np.random.rand() < prob_team1_wins:
        return team1
    else:
        return team2


## Parse Old Results from SportsReference

Need to come up with a score for a certain strategy. Would also be nice to represent the uncertainty somehow.

## Scraping

In [72]:
def read_unplayed_tournament(url, region):
    # Use requests to get the content of the webpage
    response = requests.get(url)
    html_content = response.text

    soup = BeautifulSoup(html_content, 'html.parser')

    # Find the container for the east region
    east_round_64 = soup.find(id=region).find(class_='team16').find(class_='round', recursive=False)
    matchups = []

    games = east_round_64.find_all('div', recursive=False)  # Each game is contained in a div directly under the round div
    for game in games:
        teams = game.find_all('div', recursive=False)
        if teams[0].find('span', recursive=False) is not None:
            if game.find('span', recursive=False) is not None: # regular games
                location = game.find('span', recursive=False).text.strip()[3:]
                team1_seed, team1_name, team1_link = teams[0].find('span').text, teams[0].find('a').text, teams[0].find('a')['href']
                team2_seed, team2_name, team2_link = teams[1].find('span').text, teams[1].find('a').text, teams[1].find('a')['href']
            else: # play-in games
                location = "TBD"
                team1_seed, team1_name, team1_link = teams[0].find('span').text, teams[0].find('a').text, teams[0].find('a')['href']
                team2_seed = 16 - (int(team1_seed) - 1)
                team2_name = "Play-In"
                team2_link = None

            matchups.append({
                'team_1': {'seed': team1_seed, 'name': team1_name, 'link': team1_link},
                'team_2': {'seed': team2_seed, 'name': team2_name, 'link': team2_link},
                'location': location
            })

    return matchups

In [73]:
BASE_URL = "https://www.sports-reference.com/cbb/postseason/men/{}-ncaa.html"

In [74]:
year = 2024
url = BASE_URL.format(year)

regions = ["east", "west", "south", "midwest"]
matchups = read_unplayed_tournament(url, "west")
for matchup in matchups:
    print(matchup)

{'team_1': {'seed': '1', 'name': 'UNC', 'link': '/cbb/schools/north-carolina/men/2024.html'}, 'team_2': {'seed': '16', 'name': 'Wagner', 'link': '/cbb/schools/wagner/men/2024.html'}, 'location': 'Charlotte, NC'}
{'team_1': {'seed': '8', 'name': 'Mississippi State', 'link': '/cbb/schools/mississippi-state/men/2024.html'}, 'team_2': {'seed': '9', 'name': 'Michigan State', 'link': '/cbb/schools/michigan-state/men/2024.html'}, 'location': 'Charlotte, NC'}
{'team_1': {'seed': '5', 'name': "Saint Mary's", 'link': '/cbb/schools/saint-marys-ca/men/2024.html'}, 'team_2': {'seed': '12', 'name': 'Grand Canyon', 'link': '/cbb/schools/grand-canyon/men/2024.html'}, 'location': 'Spokane, WA'}
{'team_1': {'seed': '4', 'name': 'Alabama', 'link': '/cbb/schools/alabama/men/2024.html'}, 'team_2': {'seed': '13', 'name': 'College of Charleston', 'link': '/cbb/schools/college-of-charleston/men/2024.html'}, 'location': 'Spokane, WA'}
{'team_1': {'seed': '6', 'name': 'Clemson', 'link': '/cbb/schools/clemson/me

In [None]:
print(read_unplayed_tournament(BASE_URL.format(year), "east"))
print(read_unplayed_tournament(BASE_URL.format(year), "west"))
print(read_unplayed_tournament(BASE_URL.format(year), "south"))
print(read_unplayed_tournament(BASE_URL.format(year), "midwest"))

## Data Structures for Simulation

The bracket is a big binary tree of games (the championship game is root node).

We populate the tree by taking a list of initial matchups and creating unique nodes (i.e. TournamentGame objects) for them. Matchups that play each other next will have the same parent TournamentGame node. We can create a queue of simulated games to process the games to be simulated. When a game gets both opponents filled, it will be popped off the queue and simulated.

Could model this with classes or just with a giant dataframe. Classes seems cleaner.

In [270]:
class Team:
    def __init__(self, team_name, seed, roster):
        self.team_name = team_name
        self.seed = seed
        self.roster = roster

    def __str__(self):
        return f"{self.team_name} ({self.seed})"

class Matchup:
    def __init__(self, team1, team2, location=None):
        self.team1 = team1
        self.team2 = team2
        self.location = location

    @classmethod
    def parse_matchup(cls, matchup_dict):
        """
        Parses a matchup string in the format "Team1 vs Team2" and returns a Matchup object.
        """
        t1 = Team(matchup_dict['team_1']['name'], matchup_dict['team_1']['seed'], matchup_dict['team_1']['link'])
        t2 = Team(matchup_dict['team_2']['name'], matchup_dict['team_2']['seed'], matchup_dict['team_2']['link'])
        
        return cls(t1, t2, matchup_dict['location'])

class Node:
    def __init__(self, matchup=None, left=None, right=None, parent=None):
        self.matchup = matchup
        self.left = left
        self.right = right
        self.parent = parent
        self.winner = None  # Winning team from this matchup

    def is_leaf(self):
        return self.left is None and self.right is None

class Region:
    def __init__(self, matchups):
        self.matchup_q = deque(self.parse_matchups_into_nodes(matchups))
        self.championship = None

    def parse_matchups_into_nodes(self, matchups_list):
        '''
        Takes a list of matchups and creates a tree of matchups.
        '''
        nodes = []
        for matchup in matchups_list:
            # Parse the matchup and create a Matchup object
            Matchup_obj = Matchup.parse_matchup(matchup)
            # Create a Node object for the matchup
            node = Node(matchup=Matchup_obj)
            nodes.append(node)
        
        return nodes

    def sim_region(self):
        '''
        Starting from initial matchups, sims a region of the tournament.
        '''
        # east plays west and south plays midwest in final four, so process east, west, south, midwest in that order
        # could start simulations in parallel when you add games to the queue
        while len(self.matchup_q) >= 2:
            game1 = self.matchup_q.popleft()  # pop the first game
            game2 = self.matchup_q.popleft()  # pop the second game

            # simulate those two games and do appropriate calculation and bookkeeping
            # uncertainty could be here in terms of player score (use reg. season to make distribution?)
            # simulation will lead to uncertainty in game outcomes

            
            # game1winner = np.random.choice([game1.matchup.team1, game1.matchup.team2], p=[0.5, 0.5])
            game1winner = simulate_game_kenpom(game1.matchup.team1, game1.matchup.team2, kenpom_df)
            game1.winner = game1winner

            # game2winner = np.random.choice([game2.matchup.team1, game2.matchup.team2], p=[0.5, 0.5])
            game2winner = simulate_game_kenpom(game2.matchup.team1, game2.matchup.team2, kenpom_df)
            game2.winner = game2winner
                
            # create new Game object with the winner of those two games
            new_game = Node(matchup=Matchup(game1winner, game2winner))
            
            # set the left and right children of the new game
            new_game.left = game1
            new_game.right = game2
            
            # mark the game as the parent of the other two games
            game1.parent = new_game
            game2.parent = new_game

            # add the new game to queue
            self.matchup_q.append(new_game)
        
        # sim championship
        if len(self.matchup_q) == 1:
            final_game = self.matchup_q.popleft()
            champ = np.random.choice([final_game.matchup.team1, final_game.matchup.team2], p=[0.5, 0.5])
            
            final_game.winner = champ
        
            self.championship = final_game
        else:
            raise Exception("Error: Region simulation did not end with a single championship game.")
    
    def print_region(self):
        '''
        Prints the bracket in a readable format.
        '''
        def print_node(node, indent=""):
            if node is None:
                return
            
            green_color = "\033[92m"
            reset_color = "\033[0m"
                
            # Assume each node has a 'winner' attribute. Fall back to a placeholder if not.
            winner = getattr(node, "winner", "TBD")
            
            # If the node has children, print them in a bracket-like structure.
            if node.is_leaf():
                # Print the matchup details
                t1 = node.matchup.team1
                t2 = node.matchup.team2

                if node.winner == node.matchup.team1:
                    print(indent + "├──" + f"{green_color}{node.matchup.team1}{reset_color}")
                    print(indent + "└──" + str(node.matchup.team2))
                else:
                    print(indent + "├──" + str(node.matchup.team1))
                    print(indent + "└──" + f"{green_color}{node.matchup.team2}{reset_color}")
           
            else:
                # find winner of the game
                if node.winner == node.matchup.team1:
                    print(indent + "├──" + f"{green_color}{node.matchup.team1}{reset_color}")
                    print_node(node.left, indent + "│   ")
                    print(indent + "└──" + str(node.matchup.team2))
                    print_node(node.right, indent + "    ")
                else:
                    print(indent + "├──" + str(node.matchup.team1))
                    print_node(node.left, indent + "│   ")
                    print(indent + "└──" + f"{green_color}{node.matchup.team2}{reset_color}")
                    print_node(node.right, indent + "    ")
                
        print(self.championship.winner)
        print_node(self.championship)

class Tournament:        
    def __init__(self, east_matchups, west_matchups, south_matchups, midwest_matchups):
        self.east = east_matchups
        self.west = west_matchups
        self.south = south_matchups
        self.midwest = midwest_matchups
        self.players_bookkeeping = None

    def simulate_tournament(self):
       for region in [self.east, self.west, self.south, self.midwest]:
            region.championship = region.sim_region()
            region.print_region()

    def bracket_distance(team1, team2):
        '''
        How many games it will take before team1 plays team2 if they keep winning.
        '''
        pass

In [278]:
from collections import defaultdict

east_2024_list = read_unplayed_tournament(BASE_URL.format(2024), "east")

def simulate_n_tournaments(N=100):
    champions = defaultdict(int)

    for _ in range(N):
        east_2024 = Region(east_2024_list)
        east_2024.sim_region()  # Assume you have this method
        champ = east_2024.championship.winner # winner of championship
        champions[str(champ)] += 1

    # Convert to probabilities
    champion_probs = {team: count / N for team, count in champions.items()}
    return champion_probs

In [279]:
simulate_n_tournaments(1000)

{'San Diego State (5)': 0.04,
 'UConn (1)': 0.314,
 'Iowa State (2)': 0.185,
 'Auburn (4)': 0.109,
 'Illinois (3)': 0.17,
 'Florida Atlantic (8)': 0.014,
 'Northwestern (9)': 0.017,
 'BYU (6)': 0.08,
 'Washington State (7)': 0.036,
 'Drake (10)': 0.02,
 'UAB (12)': 0.002,
 'South Dakota State (15)': 0.003,
 'Morehead State (14)': 0.002,
 'Duquesne (11)': 0.006,
 'Yale (13)': 0.002}

In [None]:
east_2024.sim_region()
east_2024.print_region()

San Diego State (5)
├──[92mSan Diego State (5)[0m
│   ├──Florida Atlantic (8)
│   │   ├──Stetson (16)
│   │   │   ├──UConn (1)
│   │   │   └──[92mStetson (16)[0m
│   │   └──[92mFlorida Atlantic (8)[0m
│   │       ├──[92mFlorida Atlantic (8)[0m
│   │       └──Northwestern (9)
│   └──[92mSan Diego State (5)[0m
│       ├──[92mSan Diego State (5)[0m
│       │   ├──[92mSan Diego State (5)[0m
│       │   └──UAB (12)
│       └──Yale (13)
│           ├──Auburn (4)
│           └──[92mYale (13)[0m
└──Illinois (3)
    ├──[92mIllinois (3)[0m
    │   ├──BYU (6)
    │   │   ├──[92mBYU (6)[0m
    │   │   └──Duquesne (11)
    │   └──[92mIllinois (3)[0m
    │       ├──[92mIllinois (3)[0m
    │       └──Morehead State (14)
    └──Iowa State (2)
        ├──Washington State (7)
        │   ├──[92mWashington State (7)[0m
        │   └──Drake (10)
        └──[92mIowa State (2)[0m
            ├──[92mIowa State (2)[0m
            └──South Dakota State (15)


In [249]:
sr_to_kenpom = {
    'Brigham Young': 'BYU',
    'Saint Mary\'s (CA)': 'Saint Mary\'s',
    'St. John\'s (NY)': 'St. John\'s',
    'Pittsburgh': 'Pittsburgh',
    'Mississippi State': 'Mississippi St.',
    'Central Florida': 'UCF',
    'Virginia Commonwealth': 'VCU',
    'Southern California': 'USC',
    'Mississippi': 'Ole Miss',
    'Massachusetts': 'UMass',
    'Miami (FL)': 'Miami FL',
    'Nevada-Las Vegas': 'UNLV',
    'North Carolina State': 'N.C. State',
    'Louisiana State': 'LSU',
    'Connecticut': 'Connecticut',
    'North Carolina': 'North Carolina',
    'Pittsburgh': 'Pittsburgh',
    'St. John\'s (NY)': 'St. John\'s',
    'Louisiana State': 'LSU',
    'Central Florida': 'UCF',
    'Texas A&M-Corpus Christi': 'Texas A&M Corpus Chris',
    'Purdue Fort Wayne': 'Purdue Fort Wayne',
    'Loyola (IL)': 'Loyola Chicago',
    'College of Charleston': 'Charleston',
    'Loyola (IL)': 'Loyola Chicago',
    'Saint Joseph\'s': 'Saint Joseph\'s',
    'Saint Francis (PA)': 'Saint Francis',
    'Arkansas-Pine Bluff': 'Arkansas Pine Bluff',
    'SIU Edwardsville': 'SIU Edwardsville',
    'Maryland-Baltimore County': 'UMBC',
    'Maryland-Eastern Shore': 'Maryland Eastern Shore',
    'Massachusetts-Lowell': 'UMass Lowell',
    'Charleston Southern': 'Charleston Southern',
    'East Tennessee State': 'East Tennessee St.',
    'Florida International': 'FIU',
    'Loyola (IL)': 'Loyola Chicago',
    'Albany (NY)': 'Albany',
    'College of Charleston': 'Charleston',
    'Detroit Mercy': 'Detroit Mercy',
    'LIU': 'LIU',
    'North Carolina State': 'N.C. State',
    'Southeast Missouri State': 'Southeast Missouri St.',
    'Louisiana-Monroe': 'Louisiana Monroe',
    'Nicholls State': 'Nicholls St.',
    'Middle Tennessee': 'Middle Tennessee',
    'UMass Lowell': 'UMass Lowell',
    'UNC Asheville': 'UNC Asheville',
    'UNC Greensboro': 'UNC Greensboro',
    'UNC Wilmington': 'UNC Wilmington',
    'Texas-Rio Grande Valley': 'UT Rio Grande Valley',
    'Western Carolina': 'Western Carolina',
    'Northern Illinois': 'Northern Illinois',
    'Southern Illinois': 'Southern Illinois',
    'Fairleigh Dickinson': 'Fairleigh Dickinson',
    'Alcorn State': 'Alcorn St.',
    'Cal State Bakersfield': 'Cal St. Bakersfield',
    'Cal State Fullerton': 'Cal St. Fullerton',
    'Cal State Northridge': 'Cal St. Northridge',
    'Prairie View': 'Prairie View A&M',
    'Southern Methodist': 'SMU',
    'Southern Mississippi': 'Southern Miss',
    'East Tennessee State': 'East Tennessee St.',
    'Eastern Illinois': 'Eastern Illinois',
    'Eastern Kentucky': 'Eastern Kentucky',
    'Eastern Michigan': 'Eastern Michigan',
    'Eastern Washington': 'Eastern Washington',
    'Illinois-Chicago': 'Illinois Chicago',
    'Kansas City': 'UMKC',
    'Louisiana-Monroe': 'Louisiana Monroe',
    'Maryland-Eastern Shore': 'Maryland Eastern Shore',
    'Tennessee-Martin': 'Tennessee Martin',
    'Texas Southern': 'Texas Southern',
    'North Dakota State': 'North Dakota St.',
    'Southeast Missouri State': 'Southeast Missouri St.',
    'Virginia Commonwealth': 'VCU',
    'UConn': 'Connecticut',
    'San Diego State': 'San Diego St.',
    'Morehead State': 'Morehead St.',
    'Washington State': 'Washington St.',
    'Iowa State': 'Iowa St.',
    'South Dakota State': 'South Dakota St.'
}

In [None]:
east_2024 = Region(read_unplayed_tournament(BASE_URL.format(2024), "east"))
west_2024 = Region(read_unplayed_tournament(BASE_URL.format(2024), "west"))
south_2024 = Region(read_unplayed_tournament(BASE_URL.format(2024), "south"))
midwest_2024 = Region(read_unplayed_tournament(BASE_URL.format(2024), "midwest"))

# Create a Tournament object
tournament = Tournament(east_2024, west_2024, south_2024, midwest_2024)
# Simulate the tournament
tournament.simulate_tournament()

In [166]:
def create_tournament_tree(year):
    q = []
    # add initial games to queue (probably will need special case for play-in games)
    # east plays west and south plays midwest in final four, so process east, west, south, midwest in that order
    print(read_unplayed_tournament(BASE_URL.format(year), "east"))
    print(read_unplayed_tournament(BASE_URL.format(year), "west"))
    print(read_unplayed_tournament(BASE_URL.format(year), "south"))
    print(read_unplayed_tournament(BASE_URL.format(year), "midwest"))
    

    # could start simulations in parallel when you add games to the queue
    while len(q) >= 2:
        # pop off two games from the queue
        # simulate those two games and do appropriate calculation and bookkeeping
            # uncertainty could be here in terms of player score (use reg. season to make distribution?)
            # simulation will lead to uncertainty in game outcomes
        # create new Game object with the winner of those two games
        # mark the game as the parent of the other two games
        # add the new game to queue
        pass

In [None]:
create_tournament_tree(2024)

[{'team_1': {'seed': '1', 'name': 'UConn', 'link': '/cbb/schools/connecticut/men/2024.html'}, 'team_2': {'seed': '16', 'name': 'Stetson', 'link': '/cbb/schools/stetson/men/2024.html'}, 'location': 'Brooklyn, NY'}, {'team_1': {'seed': '8', 'name': 'Florida Atlantic', 'link': '/cbb/schools/florida-atlantic/men/2024.html'}, 'team_2': {'seed': '9', 'name': 'Northwestern', 'link': '/cbb/schools/northwestern/men/2024.html'}, 'location': 'Brooklyn, NY'}, {'team_1': {'seed': '5', 'name': 'San Diego State', 'link': '/cbb/schools/san-diego-state/men/2024.html'}, 'team_2': {'seed': '12', 'name': 'UAB', 'link': '/cbb/schools/alabama-birmingham/men/2024.html'}, 'location': 'Spokane, WA'}, {'team_1': {'seed': '4', 'name': 'Auburn', 'link': '/cbb/schools/auburn/men/2024.html'}, 'team_2': {'seed': '13', 'name': 'Yale', 'link': '/cbb/schools/yale/men/2024.html'}, 'location': 'Spokane, WA'}, {'team_1': {'seed': '6', 'name': 'BYU', 'link': '/cbb/schools/brigham-young/men/2024.html'}, 'team_2': {'seed': '