In [200]:
import polars as pl
import pathlib
import dataclasses
import pandas as pd
import functools
import sys

In [194]:
cwd = pathlib.Path.cwd()

In [206]:
sys.path.append(str(cwd.parent))

In [208]:
import uro_cbb.bracket as bracket

In [209]:
submission_probs = pd.read_csv(cwd.parent / 'data/kaggle_2025/cleaned/submissions/submission_base_mens_with_kenpom.csv')
submission_probs

Unnamed: 0,ID,Pred
0,2025_1101_1102,0.632365
1,2025_1101_1103,0.325837
2,2025_1101_1104,0.079927
3,2025_1101_1105,0.793165
4,2025_1101_1106,0.609938
...,...,...
131402,2025_3477_3479,0.910224
131403,2025_3477_3480,0.782747
131404,2025_3478_3479,0.401514
131405,2025_3478_3480,0.192509


In [210]:
mens_team = pd.read_csv(cwd.parent / 'data/kaggle_2025/raw/MTeams.csv')
mens_team_spellings = pd.read_csv(cwd.parent / 'data/kaggle_2025/raw/MTeamSpellings.csv')

In [211]:
## Claude Generated :)
matchups_raw = [
    # SOUTH Region (Left side, top to bottom)
    [1120, 1106],    # Auburn (1) vs. Alabama State (16)
    [1257, 1166],    # Louisville (8) vs. Creighton (9)
    [1276, 1471],    # Michigan (5) vs. UC San Diego (12)
    [1401, 1463],    # Texas A&M (4) vs. Yale (13)
    [1279, 1314],    # Ole Miss (6) vs. North Carolina (11)
    [1235, 1252],    # Iowa State (3) vs. Lipscomb (14)
    [1266, 1307],    # Marquette (7) vs. New Mexico (10)
    [1277, 1136],    # Michigan State (2) vs. Bryant (15)
    
    # EAST Region (Right side, top to bottom)
    [1181, 1291],    # Duke (1) vs. Mount St. Mary's (16)
    [1280, 1124],    # Mississippi State (8) vs. Baylor (9)
    [1332, 1251],    # Oregon (5) vs. Liberty (12)
    [1112, 1103],    # Arizona (4) vs. Akron (13)
    [1140, 1433],    # BYU (6) vs. VCU (11)
    [1458, 1285],    # Wisconsin (3) vs. Montana (14)
    [1388, 1435],    # Saint Mary's (7) vs. Vanderbilt (10)
    [1104, 1352],    # Alabama (2) vs. Robert Morris (15)
    
    # WEST Region (Bottom left, top to bottom)
    [1196, 1313],    # Florida (1) vs. Norfolk State (16)
    [1163, 1328],    # UConn (8) vs. Oklahoma (9)
    [1272, 1161],    # Memphis (5) vs. Colorado State (12)
    [1268, 1213],    # Maryland (4) vs. Grand Canyon (13)
    [1281, 1179],    # Missouri (6) vs. Drake (11)
    [1403, 1423],    # Texas Tech (3) vs. UNC Wilmington (14)
    [1242, 1116],    # Kansas (7) vs. Arkansas (10)
    [1385, 1303],    # St. John's (2) vs. Omaha (15)
    
    # MIDWEST Region (Bottom right, top to bottom)
    [1222, 1188],    # Houston (1) vs. SIU Edwardsville (16)
    [1211, 1208],    # Gonzaga (8) vs. Georgia (9)
    [1155, 1270],    # Clemson (5) vs. McNeese (12)
    [1345, 1219],    # Purdue (4) vs. High Point (13)
    [1228, 1462],    # Illinois (6) vs. Xavier (11)
    [1246, 1407],    # Kentucky (3) vs. Troy (14)
    [1417, 1429],    # UCLA (7) vs. Utah State (10)
    [1397, 1459]     # Tennessee (2) vs. Wofford (15)
]

In [213]:
matchups = [
    [bracket.Team.from_id(matchup[0], mens_team), bracket.Team.from_id(matchup[1], mens_team)] 
    if matchup[0] < matchup[1]
    else [bracket.Team.from_id(matchup[1], mens_team), bracket.Team.from_id(matchup[0], mens_team)] 
    for matchup in matchups_raw
]

In [217]:
first_round = [
    Game(matchup[0], matchup[1]) for matchup in matchups
]
second_round = [
    HyperGame(first_round[i], first_round[i+1])
    for i in range(0, len(first_round), 2)
]
third_round = [
    HyperGame(second_round[i], second_round[i+1])
    for i in range(0, len(second_round), 2)
]
fourth_round = [
    HyperGame(third_round[i], third_round[i+1])
    for i in range(0, len(third_round), 2)
]
fifth_round = [
    HyperGame(fourth_round[0], fourth_round[2]),
    HyperGame(fourth_round[1], fourth_round[3])
]
sixth_round = [
    HyperGame(fifth_round[0], fifth_round[1]),
]


In [218]:
print("PROB OF WINNING THE WHOLE TOURNAMENT")
for team, prob in sorted(sixth_round[0].all_win_probs.items(), key=lambda team_prob_tuple: -team_prob_tuple[1]):
    print(team, prob)

PROB OF WINNING THE WHOLE TOURNAMENT
Team(_id=1120, name='Auburn') 0.1509411608831387
Team(_id=1181, name='Duke') 0.14696783129013896
Team(_id=1222, name='Houston') 0.14372633912922572
Team(_id=1196, name='Florida') 0.0963725771068435
Team(_id=1104, name='Alabama') 0.054247083932949414
Team(_id=1403, name='Texas Tech') 0.0407470391149578
Team(_id=1397, name='Tennessee') 0.03908034519325122
Team(_id=1235, name='Iowa St') 0.0296718524507985
Team(_id=1112, name='Arizona') 0.024828204027356988
Team(_id=1268, name='Maryland') 0.024175978633879568
Team(_id=1277, name='Michigan St') 0.020551690365418658
Team(_id=1458, name='Wisconsin') 0.017750191797743304
Team(_id=1246, name='Kentucky') 0.017239072957106916
Team(_id=1281, name='Missouri') 0.015272873223619868
Team(_id=1385, name="St John's") 0.01456541851338203
Team(_id=1345, name='Purdue') 0.01432953715242205
Team(_id=1211, name='Gonzaga') 0.014180877446131755
Team(_id=1140, name='BYU') 0.01243036424121394
Team(_id=1242, name='Kansas') 0.01

In [226]:
print("PROB OF MAKING IT TO SECOND ROUND")
for game in first_round:
    for team, prob in sorted(game.all_win_probs.items(), key=lambda team_prob_tuple: -team_prob_tuple[1])[:1]:
        print(team, prob)
    print()

PROB OF MAKING IT TO SECOND ROUND
Team(_id=1120, name='Auburn') 0.9637880157310014

Team(_id=1257, name='Louisville') 0.5795817844931614

Team(_id=1276, name='Michigan') 0.6203590595649396

Team(_id=1401, name='Texas A&M') 0.7135302169085129

Team(_id=1279, name='Mississippi') 0.520322990091003

Team(_id=1235, name='Iowa St') 0.8042768248663442

Team(_id=1266, name='Marquette') 0.5784697017777151

Team(_id=1277, name='Michigan St') 0.8498342924657177

Team(_id=1181, name='Duke') 0.960376676385308

Team(_id=1124, name='Baylor') 0.5240621770168928

Team(_id=1332, name='Oregon') 0.564107353704447

Team(_id=1112, name='Arizona') 0.8095769278562722

Team(_id=1140, name='BYU') 0.6054862847500599

Team(_id=1458, name='Wisconsin') 0.8467626904494284

Team(_id=1388, name="St Mary's CA") 0.5849392133388314

Team(_id=1104, name='Alabama') 0.903389228423506

Team(_id=1196, name='Florida') 0.9257892460913566

Team(_id=1163, name='Connecticut') 0.551842001179024

Team(_id=1161, name='Colorado St') 0

In [227]:
print("PROB OF MAKING IT TO SWEET 16")
for game in second_round:
    for team, prob in sorted(game.all_win_probs.items(), key=lambda team_prob_tuple: -team_prob_tuple[1]):
        print(team, prob)
    print()

PROB OF MAKING IT TO SWEET 16
Team(_id=1120, name='Auburn') 0.7185654394717582
Team(_id=1257, name='Louisville') 0.17533373023591453
Team(_id=1166, name='Creighton') 0.10245629147898011
Team(_id=1106, name='Alabama St') 0.003644538813347026

Team(_id=1401, name='Texas A&M') 0.40252294641153286
Team(_id=1276, name='Michigan') 0.33696209818812656
Team(_id=1471, name='UC San Diego') 0.16174442524307014
Team(_id=1463, name='Yale') 0.09877053015727044

Team(_id=1235, name='Iowa St') 0.500099178127317
Team(_id=1279, name='Mississippi') 0.23564536065297143
Team(_id=1314, name='North Carolina') 0.2083026766576644
Team(_id=1252, name='Lipscomb') 0.055952784562047164

Team(_id=1277, name='Michigan St') 0.5068877499795769
Team(_id=1266, name='Marquette') 0.28473035503781874
Team(_id=1307, name='New Mexico') 0.17704830914490763
Team(_id=1136, name='Bryant') 0.03133358583769675

Team(_id=1181, name='Duke') 0.7329103660118221
Team(_id=1124, name='Baylor') 0.1417946167913299
Team(_id=1280, name='Miss

In [228]:
print("PROB OF MAKING IT TO ELITE 8")
for game in third_round:
    for team, prob in sorted(game.all_win_probs.items(), key=lambda team_prob_tuple: -team_prob_tuple[1]):
        print(team, prob)
    print()

PROB OF MAKING IT TO ELITE 8
Team(_id=1120, name='Auburn') 0.5448759931670526
Team(_id=1401, name='Texas A&M') 0.14021317021638113
Team(_id=1276, name='Michigan') 0.11200348054275354
Team(_id=1257, name='Louisville') 0.09698618303474817
Team(_id=1166, name='Creighton') 0.04865305429391674
Team(_id=1471, name='UC San Diego') 0.038632397012282424
Team(_id=1463, name='Yale') 0.018229390564824255
Team(_id=1106, name='Alabama St') 0.00040633116804106106

Team(_id=1235, name='Iowa St') 0.29734302004893653
Team(_id=1277, name='Michigan St') 0.26884990261285713
Team(_id=1266, name='Marquette') 0.1331035715729973
Team(_id=1279, name='Mississippi') 0.11399897234058154
Team(_id=1314, name='North Carolina') 0.09665496767818689
Team(_id=1307, name='New Mexico') 0.0694858256825474
Team(_id=1252, name='Lipscomb') 0.015122566702444096
Team(_id=1136, name='Bryant') 0.005441173361449077

Team(_id=1181, name='Duke') 0.541113290800723
Team(_id=1112, name='Arizona') 0.22226836794433893
Team(_id=1124, name=

In [230]:
print("PROB OF MAKING IT TO FINAL 4")
for game in fourth_round:
    for team, prob in sorted(game.all_win_probs.items(), key=lambda team_prob_tuple: -team_prob_tuple[1]):
        print(team, prob)
    print()

PROB OF MAKING IT TO FINAL 4
Team(_id=1120, name='Auburn') 0.39102110960596503
Team(_id=1235, name='Iowa St') 0.13698221288615023
Team(_id=1277, name='Michigan St') 0.1134973088767251
Team(_id=1401, name='Texas A&M') 0.06972588137255453
Team(_id=1276, name='Michigan') 0.05362230000352747
Team(_id=1266, name='Marquette') 0.048561483824679647
Team(_id=1257, name='Louisville') 0.04856003819033881
Team(_id=1279, name='Mississippi') 0.04086884226213464
Team(_id=1314, name='North Carolina') 0.03299189457667313
Team(_id=1307, name='New Mexico') 0.020827958332707768
Team(_id=1166, name='Creighton') 0.02056499451544005
Team(_id=1471, name='UC San Diego') 0.013991621032021053
Team(_id=1463, name='Yale') 0.005251827727941076
Team(_id=1252, name='Lipscomb') 0.002813155732998821
Team(_id=1136, name='Bryant') 0.0006820107339635391
Team(_id=1106, name='Alabama St') 3.736032617894552e-05

Team(_id=1181, name='Duke') 0.36863218214729876
Team(_id=1104, name='Alabama') 0.1937801758812485
Team(_id=1112, n

In [232]:
print("PROB OF MAKING IT TO NATIONAL CHAMPIONSHIP")
for game in fifth_round:
    for team, prob in sorted(game.all_win_probs.items(), key=lambda team_prob_tuple: -team_prob_tuple[1]):
        print(team, prob)
    print()

PROB OF MAKING IT TO NATIONAL CHAMPIONSHIP
Team(_id=1120, name='Auburn') 0.25689434218558166
Team(_id=1196, name='Florida') 0.1810752853571993
Team(_id=1403, name='Texas Tech') 0.08873893107775566
Team(_id=1235, name='Iowa St') 0.06884439194610596
Team(_id=1268, name='Maryland') 0.05876042923260125
Team(_id=1277, name='Michigan St') 0.05242967251446799
Team(_id=1385, name="St John's") 0.03918447095827576
Team(_id=1281, name='Missouri') 0.03864470156692355
Team(_id=1242, name='Kansas') 0.03269491118976948
Team(_id=1401, name='Texas A&M') 0.030065384063990782
Team(_id=1276, name='Michigan') 0.022168325017022796
Team(_id=1257, name='Louisville') 0.021098338747893564
Team(_id=1266, name='Marquette') 0.01945753974337282
Team(_id=1279, name='Mississippi') 0.016096227881871986
Team(_id=1163, name='Connecticut') 0.012990089870224215
Team(_id=1314, name='North Carolina') 0.012382273258865463
Team(_id=1166, name='Creighton') 0.007415549713494374
Team(_id=1161, name='Colorado St') 0.0072386771877