### Score Calculation and Game Simulation Functions

In [1034]:
import pandas as pd
import copy
import numpy as np

In [1035]:
df = pd.read_csv('final_df.csv')

In [1036]:
df.columns

Index(['team', 'conference', 'tempo_adj', 'off_eff_adj', 'def_eff_adj',
       'Off-eFG%', 'Off-TO%', 'Off-OR%', 'Off-FTRate', 'Def-eFG%', 'Def-TO%',
       'Def-OR%', 'Def-FTRate', 'EffHgt', 'C-Hgt', 'Bench', 'Continuity',
       'Experience', '3P%', 'FT%', '3PA%', 'SOS'],
      dtype='object')

In [1037]:
from sklearn.preprocessing import MinMaxScaler

# List of stats we want to normalize
stats_to_normalize = [
    'tempo_adj', 'off_eff_adj', 'def_eff_adj',
       'Off-eFG%', 'Off-TO%', 'Off-OR%', 'Off-FTRate', 'Def-eFG%', 'Def-TO%',
       'Def-OR%', 'Def-FTRate', 'EffHgt', 'C-Hgt', 'Bench', 'Continuity',
       'Experience', '3P%', 'FT%', '3PA%', 'SOS'
]

# Apply Min-Max Scaling
scaler = MinMaxScaler()
df[stats_to_normalize] = scaler.fit_transform(df[stats_to_normalize])

In [1038]:
df.to_csv('ratings.csv')

In [1039]:
def calculate_team_rating(row):
    # Calculate Adjusted Efficiency Margin (normalized)
    adj_em = row["off_eff_adj"] - row["def_eff_adj"]

    # Weights for each stat
    weights = {
        "AdjEM": 0.40,   # Overall efficiency
        "SOS": 0.12,     # Strength of Schedule 12
        "tempo_adj": 0.05,  # Tempo/Pace
        "Off-eFG%": 0.09,  # Offensive Shooting Efficiency
        "Def-eFG%": 0.07,  # Defensive Shooting Efficiency
        "Off-TO%": -0.05,  # Turnovers (negative impact)
        "Def-TO%": 0.05,   # Defensive Turnovers Forced
        "Off-OR%": 0.05,   # Offensive Rebounding
        "Def-OR%": -0.05,  # Opponent Offensive Rebounding (lower is better)
        "Off-FTRate": 0.03,  # Getting to the FT line
        "Def-FTRate": -0.03,  # Opponent getting to the FT line (lower is better)
        "Experience": 0.10,  # Veteran teams perform better
        "Continuity": 0.05,  # Returning players help chemistry
        "Bench": 0.03,  # Bench depth
        "3P%": 0.06,   # 3-point shooting
        "FT%": 0.03,   # Free throw shooting
        "3PA%": 0.04,  # Volume of 3-pointers taken
        "EffHgt": 0.03,  # Effective height
        "C-Hgt": 0.00  # Center height
    }

    # Compute team rating as a weighted sum
    rating = (
        weights["AdjEM"] * adj_em +
        weights["SOS"] * row["SOS"] +
        weights["tempo_adj"] * row["tempo_adj"] +
        weights["Off-eFG%"] * row["Off-eFG%"] +
        weights["Def-eFG%"] * row["Def-eFG%"] +
        weights["Off-TO%"] * row["Off-TO%"] +
        weights["Def-TO%"] * row["Def-TO%"] +
        weights["Off-OR%"] * row["Off-OR%"] +
        weights["Def-OR%"] * row["Def-OR%"] +
        weights["Off-FTRate"] * row["Off-FTRate"] +
        weights["Def-FTRate"] * row["Def-FTRate"] +
        weights["Experience"] * row["Experience"] +
        weights["Continuity"] * row["Continuity"] +
        weights["Bench"] * row["Bench"] +
        weights["3P%"] * row["3P%"] +
        weights["FT%"] * row["FT%"] +
        weights["3PA%"] * row["3PA%"] +
        weights["EffHgt"] * row["EffHgt"] +
        weights["C-Hgt"] * row["C-Hgt"]
    )

    return rating

In [1040]:
df["team_rating"] = df.apply(calculate_team_rating, axis=1)

Now that we have the final rating for all kenpom stats we can add rating boosts and matchup boosts

In [1042]:
df.head(20)

Unnamed: 0,team,conference,tempo_adj,off_eff_adj,def_eff_adj,Off-eFG%,Off-TO%,Off-OR%,Off-FTRate,Def-eFG%,...,EffHgt,C-Hgt,Bench,Continuity,Experience,3P%,FT%,3PA%,SOS,team_rating
0,Abilene Christian,WAC,0.658385,0.326087,0.391421,0.277108,0.650794,0.508264,0.588235,0.514706,...,0.35,0.346667,0.614198,0.467202,0.368132,0.165414,0.542017,0.138138,0.338123,0.231129
1,Air Force,MWC,0.347826,0.354348,0.640751,0.481928,0.650794,0.202479,0.517647,0.75,...,0.516667,0.546667,0.45679,0.65328,0.491758,0.481203,0.189076,0.834835,0.537676,0.216976
2,Akron,MAC,0.782609,0.676087,0.525469,0.801205,0.325397,0.628099,0.180392,0.375,...,0.0,0.146667,0.768519,0.319946,0.384615,0.744361,0.668067,0.741742,0.237867,0.407039
3,Alabama,SEC,1.0,0.954348,0.243968,0.861446,0.333333,0.698347,0.701961,0.257353,...,0.85,0.613333,0.753086,0.586345,0.821429,0.62406,0.529412,0.765766,1.0,0.806767
4,Alabama A&M,SWAC,0.757764,0.204348,0.75067,0.180723,0.746032,0.665289,0.643137,0.727941,...,0.483333,0.48,0.916667,0.452477,0.236264,0.308271,0.323529,0.627628,0.066731,0.027015
5,Alabama St.,SWAC,0.552795,0.408696,0.619303,0.301205,0.055556,0.433884,0.301961,0.433824,...,0.383333,0.533333,0.682099,0.773762,0.445055,0.466165,0.453782,0.666667,0.099936,0.207834
6,Albany,AE,0.540373,0.447826,0.664879,0.433735,0.253968,0.603306,0.286275,0.779412,...,0.15,0.186667,0.314815,0.227577,0.631868,0.37594,0.563025,0.459459,0.139847,0.222112
7,Alcorn St.,SWAC,0.360248,0.326087,0.662198,0.240964,0.563492,0.652893,0.321569,0.632353,...,0.183333,0.16,0.611111,0.31593,0.260989,0.383459,0.546218,0.267267,0.222861,0.090994
8,American,PL,0.291925,0.423913,0.549598,0.584337,0.34127,0.247934,0.286275,0.566176,...,0.366667,0.32,0.567901,1.0,0.697802,0.631579,0.710084,0.723724,0.151341,0.308938
9,Appalachian St.,SB,0.267081,0.402174,0.380697,0.457831,0.349206,0.400826,0.509804,0.125,...,0.716667,0.586667,0.358025,0.305221,0.612637,0.548872,0.273109,0.612613,0.273308,0.285019


#### Rating and Matchup Boosts

In [1044]:
def rating_boosts (row, opponent):
    boost_rating = 0

    #Momentum Boost
    momentum_boost_set = {""}
    if row['team'] in momentum_boost_set:
        boost_rating += 0.1
        
    return boost_rating
    

In [1045]:
#Sim Games
def simulate_game (team_name1, team_name2, df):
    
    team1_row = df[df["team"] == team_name1].iloc[0]
    team2_row = df[df['team'] == team_name2].iloc[0]

    team1_score = calculate_team_rating(team1_row)
    team2_score = calculate_team_rating(team2_row)
    
    team1_score += rating_boosts(team1_row,team2_row)
    team2_score += rating_boosts(team2_row,team1_row)
    
    rating_diff = (team1_score - team2_score) * 7.5  # Scale factor to adjust win probability distribution

    win_prob_team1 = 1 / (1 + np.exp(-rating_diff))  

    # Simulate game outcome
    random_number = np.random.rand()
    
    if random_number < win_prob_team1:
        winner = team_name1
        winning_prob = win_prob_team1
    else: 
        winner = team_name2
        winning_prob = 1-win_prob_team1
        
    return winner, winning_prob
    

#### Bracket Dictionary (2023 for now)

In [1047]:
def load_bracket():
    bracket = {
        "South": [
            (1, "Auburn"), (16, "Alabama St."),
            (8, "Louisville"), (9, "Creighton"),
            (5, "Michigan"), (12, "UC San Diego"),
            (4, "Texas A&M"), (13, "Yale"),
            (6, "Mississippi"), (11, "North Carolina"),
            (3, "Iowa St."), (14, "Lipscomb"),
            (7, "Marquette"), (10, "New Mexico"),
            (2, "Michigan St."), (15, "Bryant")
        ],
        "West": [
            (1, "Florida"), (16, "Norfolk St."),
            (8, "Connecticut"), (9, "Oklahoma"),
            (5, "Memphis"), (12, "Colorado St."),
            (4, "Maryland"), (13, "Grand Canyon"),
            (6, "Missouri"), (11, "Drake"),
            (3, "Texas Tech"), (14, "UNC Wilmington"),
            (7, "Kansas"), (10, "Arkansas"),
            (2, "St. John's"), (15, "Nebraska Omaha")
        ],
        "East": [
            (1, "Duke") , (16, "American"),
            (8, "Mississippi St."), (9, "Baylor"),
            (5, "Oregon"), (12, "Liberty"),
            (4, "Arizona"), (13, "Akron"),
            (6, "BYU"), (11, "VCU"),
            (3, "Wisconsin"), (14, "Montana"),
            (7, "Saint Mary's"), (10, "Vanderbilt"),
            (2, "Alabama"), (15, "Robert Morris")
        ],
        "Midwest": [
            (1, "Houston"), (16, "SIUE"),
            (8, "Gonzaga"), (9, "Georgia"),
            (5, "Clemson"), (12, "McNeese"),
            (4, "Purdue"), (13, "High Point"),
            (6, "Illinois"), (11, "Texas"),
            (3, "Kentucky"), (14, "Troy"),
            (7, "UCLA"), (10, "Utah St."),
            (2, "Tennessee"), (15, "Wofford")
        ]
    }
    return bracket

In [1048]:
def print_bracket(bracket, old_bracket, game_prob):
    for region in old_bracket.keys():
        print(f"🌎 {region} Region:\n")

        # Extract winners from the current bracket
        current_winners = {team[1] for team in bracket[region]}  # Store only the team names in a set

        for idx, matchup in enumerate(old_bracket[region]):# Loop through the previous round's matchups
            
            spacing = (idx % 2 != 0) and (idx < len(old_bracket[region]) - 1)
            
            seed, team = matchup

            if team in current_winners:  
                winning_prob = game_prob[team]
                print(f"{seed} {team} ✅ --- {winning_prob}%")
            else: 
                print(f"{seed} {team} ❌")

            if spacing:
                print("\n")
        
        print("-"*30)

In [1049]:
def print_final_four(teams, winners, game_prob):
    print("\n🏆 Final Four 🏆\n")
    if teams[0][0][1] == winners[0][1]:
        winning_prob = game_prob[teams[0][0][1]]
        print(f"{teams[0][0][0]} {teams[0][0][1]} ✅ ---{winning_prob}%")
        print(f"{teams[1][0][0]} {teams[1][0][1]} ❌")
    else:
        winning_prob = game_prob[teams[1][0][1]]
        print(f"{teams[0][0][0]} {teams[0][0][1]} ❌")
        print(f"{teams[1][0][0]} {teams[1][0][1]} ✅ ---{winning_prob}%")
    print('\n')
    if teams[2][0][1] == winners[1][1]:
        winning_prob = game_prob[teams[2][0][1]]
        print(f"{teams[2][0][0]} {teams[2][0][1]} ✅ ---{winning_prob}%")
        print(f"{teams[3][0][0]} {teams[3][0][1]} ❌")
    else:
        winning_prob = game_prob[teams[3][0][1]]
        print(f"{teams[2][0][0]} {teams[2][0][1]} ❌")
        print(f"{teams[3][0][0]} {teams[3][0][1]} ✅ ---{winning_prob}%")
    print("\n" + "-"*30)

In [1050]:
def print_chip(teams, champ, game_prob):
    print("\n🏆 National Championship 🏆\n")
    if teams[0][1] == champ:
        winning_prob = game_prob[teams[0][1]]
        print(f"{teams[0][0]} {teams[0][1]} ✅ --- {winning_prob}%")
        print(f"{teams[1][0]} {teams[1][1]} ❌")
    else:
        winning_prob = game_prob[teams[1][1]]
        print(f"{teams[0][0]} {teams[0][1]} ❌")
        print(f"{teams[1][0]} {teams[1][1]} ✅ --- {winning_prob}%")
    print("\n" + "-"*30)
    print("\n🏀🏆 Champion 🏆🏀\n")
    print(champ)

In [1051]:
def madness_sim(bracket, df):
    print("🏀 NCAA March Madness Bracket 🏀\n")
    regions = ['West','East' , 'Midwest', 'South']
    
    round_names = ["First Round", "Round of 32", "Sweet 16", "Elite 8"]
    round_winners_list = []
    for round_index, round_name in enumerate(round_names):
        game_prob = {}
        print(f"\n🏀 {round_name} 🏀\n")
        old_bracket = copy.deepcopy(bracket)
        for region in regions:
            winners_list = []
            step = 16 // (2 ** round_index)
    
            for i in range(0, step, 2):
                team1 = bracket[region][i][1]
                team2 = bracket[region][i+1][1]
                winner, winning_prob = simulate_game(team1, team2, df)

                game_prob[winner] = round(winning_prob*100) #game probability dictionary - team:prob
                if team1 == winner:
                    winners_list.append(bracket[region][i])
                else:
                    winners_list.append(bracket[region][i+1])
            
            bracket[region] = winners_list  # Update the bracket
            round_winners_list.append(winners_list)
        print_bracket(bracket, old_bracket, game_prob)

    game_prob = {}
    final_four = []
    for region in regions:
        final_four.append(bracket[region])
        
    #Final Four
    winners_list = []
    for i in range(0,4,2):
        team1 = final_four[i][0][1]
        team2 = final_four[i+1][0][1]
        winner, winning_prob = simulate_game(team1,team2, df)
        game_prob[winner] = round(winning_prob*100) #game probability dictionary - team:prob
        if team1 == winner:
            winners_list.append(final_four[i][0])
        else:
            winners_list.append(final_four[i+1][0])
    print_final_four(final_four, winners_list, game_prob)

    #Championship
    game_prob = {}
    champion, winning_prob = simulate_game(winners_list[0][1], winners_list[1][1], df)
    game_prob[champion] = round(winning_prob*100) #game probability dictionary - team:prob
    print_chip(winners_list, champion, game_prob)

    return champion

In [1052]:
madness_sim(load_bracket(),df)

🏀 NCAA March Madness Bracket 🏀


🏀 First Round 🏀

🌎 South Region:

1 Auburn ✅ --- 99%
16 Alabama St. ❌


8 Louisville ❌
9 Creighton ✅ --- 41%


5 Michigan ❌
12 UC San Diego ✅ --- 35%


4 Texas A&M ✅ --- 82%
13 Yale ❌


6 Mississippi ✅ --- 59%
11 North Carolina ❌


3 Iowa St. ✅ --- 88%
14 Lipscomb ❌


7 Marquette ✅ --- 59%
10 New Mexico ❌


2 Michigan St. ✅ --- 92%
15 Bryant ❌
------------------------------
🌎 West Region:

1 Florida ✅ --- 98%
16 Norfolk St. ❌


8 Connecticut ❌
9 Oklahoma ✅ --- 58%


5 Memphis ✅ --- 47%
12 Colorado St. ❌


4 Maryland ✅ --- 86%
13 Grand Canyon ❌


6 Missouri ✅ --- 89%
11 Drake ❌


3 Texas Tech ✅ --- 90%
14 UNC Wilmington ❌


7 Kansas ❌
10 Arkansas ✅ --- 30%


2 St. John's ✅ --- 87%
15 Nebraska Omaha ❌
------------------------------
🌎 East Region:

1 Duke ✅ --- 98%
16 American ❌


8 Mississippi St. ❌
9 Baylor ✅ --- 50%


5 Oregon ✅ --- 74%
12 Liberty ❌


4 Arizona ✅ --- 90%
13 Akron ❌


6 BYU ✅ --- 65%
11 VCU ❌


3 Wisconsin ✅ --- 93%
14 Montana ❌


7 Sain

'Oklahoma'

In [1053]:
def tournament_sim_no_print(bracket, df):
    regions = ['East', 'West', 'South', 'Midwest']
    
    round_names = ["First Round", "Round of 32", "Sweet 16", "Elite 8"]
    round_winners_list = []
    for round_index, round_name in enumerate(round_names):
        game_prob = {}
        old_bracket = copy.deepcopy(bracket)
        for region in regions:
            winners_list = []
            step = 16 // (2 ** round_index)
    
            for i in range(0, step, 2):
                team1 = bracket[region][i][1]
                team2 = bracket[region][i+1][1]
                winner, winning_prob = simulate_game(team1, team2, df)

                game_prob[winner] = round(winning_prob*100) #game probability dictionary - team:prob
                if team1 == winner:
                    winners_list.append(bracket[region][i])
                else:
                    winners_list.append(bracket[region][i+1])
            
            bracket[region] = winners_list  # Update the bracket
            round_winners_list.append(winners_list)

    game_prob = {}
    final_four = []
    for region in regions:
        final_four.append(bracket[region])
        
    #Final Four
    winners_list = []
    for i in range(0,4,2):
        team1 = final_four[i][0][1]
        team2 = final_four[i+1][0][1]
        winner, winning_prob = simulate_game(team1,team2, df)
        game_prob[winner] = round(winning_prob*100) #game probability dictionary - team:prob
        if team1 == winner:
            winners_list.append(final_four[i][0])
        else:
            winners_list.append(final_four[i+1][0])

    #Championship
    game_prob = {}
    champion, winning_prob = simulate_game(winners_list[0][1], winners_list[1][1], df)
    game_prob[champion] = round(winning_prob*100) #game probability dictionary - team:prob

    return champion

In [1054]:
def winner_probabilities():
    n = 1000
    end_string = ''
    winners_count = {}
    for i in range(n):
        team_champ = tournament_sim_no_print(load_bracket(),df)
        winners_count[team_champ] = winners_count.get(team_champ, 0) + 1 #adds one
        
    for team in sorted(winners_count, key=winners_count.get, reverse=True):
        prob = round(((winners_count[team]/n)*100),1)
        end_string += f"{team} -- {prob}%\n"

    return end_string