In [808]:
# BASIC SETUP

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import PoissonRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

data = pd.read_csv("/Users/willfriel/March/march(4-30).csv")

features = ['AdjO', 'AdjD', 'BARTHAG', 'EFG%', 'EFG%D', 'FT_Off_Eff', 'FT_Def_Eff', 'TOV%', 'TOV%D', 'OREB%', 'DREB%', '2pt_Off_Eff', '2pt_Def_Eff', '3pt_Off_Eff', '3pt_Def_Eff', 'AST%', 'Elite_SOS', 'WAB']

for col in features:
    mean_col = data.groupby('Year')[col].transform('mean')
    std_col = data.groupby('Year')[col].transform('std')
    data[col + '_z'] = (data[col] - mean_col) / std_col

year = 2025
data_train = data[data['Year'] != year]
data_test0 = data[data['Year'] == year].copy()

data_test = get_ranks(data_train, data_test0)

In [809]:
def get_ranks(data_train, data_test):
    
    x_train = data_train[['Won_ConfT', 'Experience',
                      'AdjO_z', 'AdjD_z', 'BARTHAG_z', 'EFG%_z', 'EFG%D_z',
                      'FT_Off_Eff_z', 'FT_Def_Eff_z', 'TOV%_z', 'TOV%D_z',
                      'OREB%_z', 'DREB%_z', '2pt_Off_Eff_z', '2pt_Def_Eff_z',
                      '3pt_Off_Eff_z', '3pt_Def_Eff_z', 'AST%_z',
                      'Elite_SOS_z', 'WAB_z']]

    x_test = data_test[['Won_ConfT', 'Experience',
                    'AdjO_z', 'AdjD_z', 'BARTHAG_z', 'EFG%_z', 'EFG%D_z',
                    'FT_Off_Eff_z', 'FT_Def_Eff_z', 'TOV%_z', 'TOV%D_z',
                    'OREB%_z', 'DREB%_z', '2pt_Off_Eff_z', '2pt_Def_Eff_z',
                    '3pt_Off_Eff_z', '3pt_Def_Eff_z', 'AST%_z',
                    'Elite_SOS_z', 'WAB_z']]

    data_test.loc[:, 'Win_Prob'] = logreg_ridge(x_train, x_test, 'Won_Natty')
    data_test.loc[:, 'Natty_Rank'] = data_test['Win_Prob'].rank(ascending=False, method='min').astype(int)
    
    data_test.loc[:, 'CG_Prob'] = logreg_ridge(x_train, x_test, 'Champ_Game')
    data_test.loc[:, 'CG_Rank'] = data_test['CG_Prob'].rank(ascending=False, method='min').astype(int)
    
    data_test.loc[:, 'F4_Prob'] = logreg_ridge(x_train, x_test, 'Final_4')
    data_test.loc[:, 'F4_Rank'] = data_test['F4_Prob'].rank(ascending=False, method='min').astype(int)
    
    data_test.loc[:, 'E8_Prob'] = logreg_ridge(x_train, x_test, 'Elite_8')
    data_test.loc[:, 'E8_Rank'] = data_test['E8_Prob'].rank(ascending=False, method='min').astype(int)
    
    data_test.loc[:, 'S16_Prob'] = logreg_ridge(x_train, x_test, 'Sweet_16')
    data_test.loc[:, 'S16_Rank'] = data_test['S16_Prob'].rank(ascending=False, method='min').astype(int)
    
    data_test.loc[:, 'R32_Prob'] = logreg_ridge(x_train, x_test, 'Round_32')
    data_test.loc[:, 'R32_Rank'] = data_test['R32_Prob'].rank(ascending=False, method='min').astype(int)
    
    data_test.loc[:, 'Proj_Wins'] = get_proj_wins(x_train, x_test, data_train['Wins'])
    
    return data_test

def logreg_ridge(x_train, x_test, y_train):
    
    y_train = data_train[y_train]
    
    scaler = StandardScaler()
    x_scaled = scaler.fit_transform(x_train)

    ridge = LogisticRegressionCV(
        cv=5,
        penalty='l2',
        solver='lbfgs',
        scoring='roc_auc',
        class_weight = 'balanced',
        max_iter=10000
    )
    ridge.fit(x_scaled, y_train)

    x_test_scaled = scaler.transform(x_test)
    predicted_probs = ridge.predict_proba(x_test_scaled)[:, 1]
    
    return predicted_probs

def get_proj_wins(x_train, x_test, y_train_series):

    model = make_pipeline(StandardScaler(), PoissonRegressor(alpha=0.5, max_iter=10000))
    model.fit(x_train, y_train_series)
    y_pred = model.predict(x_test)
    
    power = 1.2
    
    y_pred_transformed = np.power(y_pred, power)
    
    scale = 63 / y_pred_transformed.sum()
    
    y_pred_scaled = y_pred_transformed * scale
    
    return y_pred_scaled

In [810]:
# MATCHUP FUNCTION

def team_matchup(team1, team2):
        
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2]
    
    seedA = teamA['Seed'].iloc[0]
    seedB = teamB['Seed'].iloc[0]
    
    print("\t" * 4 + f"{team1}({seedA}) vs. {team2}({seedB})\n")
    
    wab1, wab2, esos1, esos2 = sos_matchup(team1, team2)
    tempo1, tempo2, use_imp = tempo_matchup(team1, team2)
    oe1, oe2, de1, de2, efgo1, efgo2, efgd1, efgd2, odnet1, odnet2 = off_def_matchup(team1, team2)
    p2imp1, p2imp2, p2eff1, p2eff2, p2netimp1, p2netimp2, p2neteff1, p2neteff2 = twopt_matchup(team1, team2)
    p3imp1, p3imp2, p3eff1, p3eff2, p3netimp1, p3netimp2, p3neteff1, p3neteff2 = threept_matchup(team1, team2)
    ftimp1, ftimp2, fteff1, fteff2, ftnetimp1, ftnetimp2, ftneteff1, ftneteff2 = ft_matchup(team1, team2)
    to1, to2, tod1, tod2, to1net, to2net = to_matchup(team1, team2)
    hgt1, hgt2, orb1, orb2, drb1, drb2, rb1net, rb2net = reb_matchup(team1, team2)
    if use_imp:
        get_win_percentages(team1, team2, wab1, wab2, odnet1, odnet2, p2netimp1, p2netimp2, p3netimp1, p3netimp2, ftnetimp1, ftnetimp2, to1net, to2net, rb1net, rb2net)
    else:
        get_win_percentages(team1, team2, wab1, wab2, odnet1, odnet2, p2neteff1, p2neteff2, p3neteff1, p3neteff2, ftneteff1, ftneteff2, to1net, to2net, rb1net, rb2net)
        
def get_win_percentages(team1, team2, wab1, wab2, odnet1, odnet2, p2net1, p2net2, p3net1, p3net2, ftnet1, ftnet2, to1net, to2net, rb1net, rb2net):
    
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2]
    
    teamA_nat = teamA['Natty_Rank'].iloc[0]
    teamB_nat = teamB['Natty_Rank'].iloc[0]
    teamA_cg = teamA['CG_Rank'].iloc[0]
    teamB_cg = teamB['CG_Rank'].iloc[0]
    teamA_f4 = teamA['F4_Rank'].iloc[0]
    teamB_f4 = teamB['F4_Rank'].iloc[0]
    teamA_e8 = teamA['E8_Rank'].iloc[0]
    teamB_e8 = teamB['E8_Rank'].iloc[0]
    teamA_s16 = teamA['S16_Rank'].iloc[0]
    teamB_s16 = teamB['S16_Rank'].iloc[0]
    teamA_r32 = teamA['R32_Rank'].iloc[0]
    teamB_r32 = teamB['R32_Rank'].iloc[0]
    teamA_pwins = teamA['Proj_Wins'].iloc[0]
    teamB_pwins = teamB['Proj_Wins'].iloc[0]
    
    odnet1 = (odnet1+1)/2
    odnet2 = (odnet2+1)/2
    p2net1 = (p2net1+1)/2
    p2net2 = (p2net2+1)/2
    p3net1 = (p3net1+1)/2
    p3net2 = (p3net2+1)/2
    ftnet1 = (ftnet1+1)/2
    ftnet2 = (ftnet2+1)/2
    to1net = (to1net+1)/2
    to2net = (to2net+1)/2
    rb1net = (rb1net+1)/2
    rb2net = (rb2net+1)/2
    
    wab_max = data_test['WAB'].max()
    wab_min = data_test['WAB'].min()
    
    wab_net1 = 2 * (wab1 - wab_min) / (wab_max - wab_min) - 1
    wab_net2 = 2 * (wab2 - wab_min) / (wab_max - wab_min) - 1
    
    wab_net1 = (wab_net1 + 1) / 2
    wab_net2 = (wab_net2 + 1) / 2
    
    best = 30*1 + 5 + 6*1 + 5*1 + 6*1 + 4 + 10*(1)
    worst = 30*(-1) + (-5) + 6*(-1) + 5*(-1) + 6*(-1) + (-4) + 10*(-1)
    
    score = 30*(odnet1 - odnet2) + 5*(p2net1 - p2net2) + 6*(p3net1 - p3net2) + 5*(ftnet1 - ftnet2) + 6*(to1net - to2net) + 4*(rb1net - rb2net) + 10*(wab_net1 - wab_net2)
    
    normalized_score = (score - worst) / (best - worst)  
    scaled_score = 12 * (normalized_score - 0.5)          

    percentage1 = 100 / (1 + math.exp(-scaled_score))
    percentage2 = 100 - percentage1
    
    print("\n\n\nWIN PERCENTAGE")
    print(f"{team1 + ':':<12} {percentage1:.1f} {'%'}\t{team2 + ':':<12} {percentage2:.1f} {'%'}")
    print()
    print("ROUND OF 32 RANK")
    print(f"{team1 + ':':<12} {teamA_r32}/68\t{team2 + ':':<12} {teamB_r32}/68")
    print()
    print("SWEET 16 RANK")
    print(f"{team1 + ':':<12} {teamA_s16}/68\t{team2 + ':':<12} {teamB_s16}/68")  
    print()
    print("ELITE 8 RANK")
    print(f"{team1 + ':':<12} {teamA_e8}/68\t{team2 + ':':<12} {teamB_e8}/68")
    print()
    print("FINAL 4 RANK")
    print(f"{team1 + ':':<12} {teamA_f4}/68\t{team2 + ':':<12} {teamB_f4}/68")
    print()
    #print("CHAMPIONSHIP GAME RANK")
    #print(f"{team1 + ':':<12} {teamA_cg}/68\t{team2 + ':':<12} {teamB_cg}/68")
    #print()
    print("NATIONAL CHAMPION RANK")
    print(f"{team1 + ':':<12} {teamA_nat}/68\t{team2 + ':':<12} {teamB_nat}/68")
    print()
    print("PROJECTED TOURNAMENT WINS")
    print(f"{team1 + ':':<12} {teamA_pwins:.2f}\t{team2 + ':':<12} {teamB_pwins:.2f}")
    
def sos_matchup(team1, team2):
    
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2]
    
    teamA_wab = teamA['WAB'].iloc[0]
    teamB_wab = teamB['WAB'].iloc[0]
    teamA_esos = teamA['Elite_SOS'].iloc[0]
    teamB_esos = teamB['Elite_SOS'].iloc[0]
    
    print("STRENGTH OF SCHEDULE COMPARISON")
    print()
    print("ELITE SOS")
    print(f"{team1 + ':':<12} {teamA_esos}\t{team2 + ':':<12} {teamB_esos}")
    print()
    print("WINS ABOVE BUBBLE")
    print(f"{team1 + ':':<12} {teamA_wab}\t{team2 + ':':<12} {teamB_wab}")
    
    return teamA_wab, teamB_wab, teamA_esos, teamB_esos
    
def check_tempo(aT, bT):
    
    if abs(aT - bT) <= 5:
        return True
    else:
        return False
    
def tempo_matchup(team1, team2):
    
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2]
    
    teamA_tempo = teamA['AdjT'].iloc[0]
    teamB_tempo = teamB['AdjT'].iloc[0]
    
    use_impact = check_tempo(teamA_tempo, teamB_tempo)
    
    print("\n\n\nTEMPO COMPARISON")
    print()
    print("ADJUSTED TEMPO")
    print(f"{team1 + ':':<12} {teamA_tempo:.2f}\t{team2 + ':':<12} {teamB_tempo:.2f}")
    if use_impact:
        print("Suggestion: Look at the teams' respective Impact Scores")
    else:
        print("Suggestion: Look at the teams' respective Efficiency Scores")
        
    return teamA_tempo, teamB_tempo, use_impact
    
    
def off_def_matchup(team1, team2):
    
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2]
    
    min_OE = data_test['AdjO'].min()
    max_OE = data_test['AdjO'].max()
    min_DE = data_test['AdjD'].min()
    max_DE = data_test['AdjD'].max()
    
    min_matchup = np.log(min_OE * min_DE)
    max_matchup = np.log(max_OE * max_DE)
    
    teamA_OE = teamA['AdjO'].iloc[0]
    teamA_DE = teamA['AdjD'].iloc[0]
    teamA_efgO = teamA['EFG%'].iloc[0]
    teamA_efgD = teamA['EFG%D'].iloc[0]   
    teamB_OE = teamB['AdjO'].iloc[0]
    teamB_DE = teamB['AdjD'].iloc[0]
    teamB_efgO = teamB['EFG%'].iloc[0]
    teamB_efgD = teamB['EFG%D'].iloc[0]
    
    AB = np.log(teamA_OE * teamB_DE)
    AB_net = 2 * ((AB - min_matchup) / (max_matchup - min_matchup)) - 1
    BA = np.log(teamB_OE * teamA_DE)
    BA_net = 2 * ((BA - min_matchup) / (max_matchup - min_matchup)) - 1
    
    print("\n\n\nOFFENSE VS DEFENSE COMPARISON")
    print()
    print("ADJUSTED OFFENSIVE EFFICIENCY")
    print(f"{team1 + ':':<12} {teamA_OE:.2f}\t{team2 + ':':<12} {teamB_OE:.2f}")
    print()
    print("ADJUSTED DEFENSIVE EFFICIENCY")
    print(f"{team1 + ':':<12} {teamA_DE:.2f}\t{team2 + ':':<12} {teamB_DE:.2f}")
    print()
    print("OFFENSIVE EFFECTIVE FG%")
    print(f"{team1 + ':':<12} {teamA_efgO}\t{team2 + ':':<12} {teamB_efgO}")
    print()
    print("DEFENSIVE EFFECTIVE FG%")
    print(f"{team1 + ':':<12} {teamA_efgD}\t{team2 + ':':<12} {teamB_efgD}")
    print()
    print("HEAD TO HEAD MATCHUP")
    print("OFFENSE VS DEFENSE (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)")
    print(f"{team1 + ':':<12} {AB_net:.2f}\t{team2 + ':':<12} {BA_net:.2f}")
    
    return teamA_OE, teamB_OE, teamA_DE, teamB_DE, teamA_efgO, teamB_efgO, teamA_efgD, teamB_efgD, AB_net, BA_net
    
def twopt_matchup(team1, team2):
    
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2]
    
    min_off_impact = data_test['2pt_Off_Impact'].min()
    max_off_impact = data_test['2pt_Off_Impact'].max()
    min_off_eff = data_test['2pt_Off_Eff'].min()
    max_off_eff = data_test['2pt_Off_Eff'].max()
    min_def_impact = data_test['2pt_Def_Impact'].min()
    max_def_impact = data_test['2pt_Def_Impact'].max()
    min_def_eff = data_test['2pt_Def_Eff'].min()
    max_def_eff = data_test['2pt_Def_Eff'].max()
    
    min_impact_matchup = np.log(min_off_impact * min_def_impact)
    max_impact_matchup = np.log(max_off_impact * max_def_impact)
    min_eff_matchup = np.log(min_off_eff * min_def_eff)
    max_eff_matchup = np.log(max_off_eff * max_def_eff)
    
    # Impact: 2pt% * 2PR * AdjT * (1 - TOV%)
    # Efficiency: 2pt% * 2PR * 66 * (1 - TOV%)
    teamA_off_impact = teamA['2pt_Off_Impact'].iloc[0]
    teamA_off_eff = teamA['2pt_Off_Eff'].iloc[0]
    teamA_def_impact = teamA['2pt_Def_Impact'].iloc[0]
    teamA_def_eff = teamA['2pt_Def_Eff'].iloc[0]
    teamA_net_impact = teamA['2pt_Net_Impact'].iloc[0]
    teamA_net_eff = teamA['2pt_Net_Eff'].iloc[0]
    teamB_off_impact = teamB['2pt_Off_Impact'].iloc[0]
    teamB_off_eff = teamB['2pt_Off_Eff'].iloc[0]
    teamB_def_impact = teamB['2pt_Def_Impact'].iloc[0]
    teamB_def_eff = teamB['2pt_Def_Eff'].iloc[0]
    teamB_net_impact = teamB['2pt_Net_Impact'].iloc[0]
    teamB_net_eff = teamB['2pt_Net_Eff'].iloc[0]
    
    # Calculating Team A's offense vs Team B's defense and vice versa
    # Closer to -1: Advantage Defense, Closer to 0: Even matchup, Closer to 1: Advantage Offense
    AB_impact = np.log(teamA_off_impact * teamB_def_impact)
    AB_net_impact = 2 * ((AB_impact - min_impact_matchup) / (max_impact_matchup - min_impact_matchup)) - 1
    BA_impact = np.log(teamB_off_impact * teamA_def_impact)
    BA_net_impact = 2 * ((BA_impact - min_impact_matchup) / (max_impact_matchup - min_impact_matchup)) - 1
    AB_eff = np.log(teamA_off_eff * teamB_def_eff)
    AB_net_eff = 2 * ((AB_eff - min_eff_matchup) / (max_eff_matchup - min_eff_matchup)) - 1
    BA_eff = np.log(teamB_off_eff * teamA_def_eff)
    BA_net_eff = 2 * ((BA_eff - min_eff_matchup) / (max_eff_matchup - min_eff_matchup)) - 1
    
    print("\n\n\nTWO POINT COMPARISON")
    print()
    print("NET 2PT IMPACT SCORE (Shows estimated 2pt margin for team given adjusted tempo)")
    print(f"{team1 + ':':<12} {teamA_net_impact:.2f}\t{team2 + ':':<12} {teamB_net_impact:.2f}")
    print()
    print("NET 2PT EFFICIENCY SCORE (Shows estimated 2pt margin for team given a fixed tempo)")
    print(f"{team1 + ':':<12} {teamA_net_eff:.2f}\t{team2 + ':':<12} {teamB_net_eff:.2f}")
    print()
    print("HEAD TO HEAD MATCHUP")
    print("OFFENSE VS DEFENSE IMPACT SCORE (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)")
    print(f"{team1 + ':':<12} {AB_net_impact:.2f}\t{team2 + ':':<12} {BA_net_impact:.2f}")
    print()
    print("OFFENSE VS DEFENSE EFFICIENCY SCORE (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)")
    print(f"{team1 + ':':<12} {AB_net_eff:.2f}\t{team2 + ':':<12} {BA_net_eff:.2f}")
    
    return teamA_net_impact, teamB_net_impact, teamA_net_eff, teamB_net_eff, AB_net_impact, BA_net_impact, AB_net_eff, BA_net_eff
    
def threept_matchup(team1, team2):
    
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2]
    
    min_off_impact = data_test['3pt_Off_Impact'].min()
    max_off_impact = data_test['3pt_Off_Impact'].max()
    min_off_eff = data_test['3pt_Off_Eff'].min()
    max_off_eff = data_test['3pt_Off_Eff'].max()
    min_def_impact = data_test['3pt_Def_Impact'].min()
    max_def_impact = data_test['3pt_Def_Impact'].max()
    min_def_eff = data_test['3pt_Def_Eff'].min()
    max_def_eff = data_test['3pt_Def_Eff'].max()
    
    min_impact_matchup = np.log(min_off_impact * min_def_impact)
    max_impact_matchup = np.log(max_off_impact * max_def_impact)
    min_eff_matchup = np.log(min_off_eff * min_def_eff)
    max_eff_matchup = np.log(max_off_eff * max_def_eff)
    
    # Impact: 3pt% * 3PR * AdjT * (1 - TOV%)
    # Efficiency: 3pt% * 3PR * 66 * (1 - TOV%)
    teamA_off_impact = teamA['3pt_Off_Impact'].iloc[0]
    teamA_off_eff = teamA['3pt_Off_Eff'].iloc[0]
    teamA_def_impact = teamA['3pt_Def_Impact'].iloc[0]
    teamA_def_eff = teamA['3pt_Def_Eff'].iloc[0]
    teamA_net_impact = teamA['3pt_Net_Impact'].iloc[0]
    teamA_net_eff = teamA['3pt_Net_Eff'].iloc[0]
    teamB_off_impact = teamB['3pt_Off_Impact'].iloc[0]
    teamB_off_eff = teamB['3pt_Off_Eff'].iloc[0]
    teamB_def_impact = teamB['3pt_Def_Impact'].iloc[0]
    teamB_def_eff = teamB['3pt_Def_Eff'].iloc[0]
    teamB_net_impact = teamB['3pt_Net_Impact'].iloc[0]
    teamB_net_eff = teamB['3pt_Net_Eff'].iloc[0]
    
    # Calculating Team A's offense vs Team B's defense and vice versa
    # Closer to -1: Advantage Defense, Closer to 0: Even matchup, Closer to 1: Advantage Offense
    AB_impact = np.log(teamA_off_impact * teamB_def_impact)
    AB_net_impact = 2 * ((AB_impact - min_impact_matchup) / (max_impact_matchup - min_impact_matchup)) - 1
    BA_impact = np.log(teamB_off_impact * teamA_def_impact)
    BA_net_impact = 2 * ((BA_impact - min_impact_matchup) / (max_impact_matchup - min_impact_matchup)) - 1
    AB_eff = np.log(teamA_off_eff * teamB_def_eff)
    AB_net_eff = 2 * ((AB_eff - min_eff_matchup) / (max_eff_matchup - min_eff_matchup)) - 1
    BA_eff = np.log(teamB_off_eff * teamA_def_eff)
    BA_net_eff = 2 * ((BA_eff - min_eff_matchup) / (max_eff_matchup - min_eff_matchup)) - 1
    
    print("\n\n\nTHREE POINT COMPARISON")
    print()
    print("NET 3PT IMPACT SCORE (Shows estimated 3pt margin for team given adjusted tempo)")
    print(f"{team1 + ':':<12} {teamA_net_impact:.2f}\t{team2 + ':':<12} {teamB_net_impact:.2f}")
    print()
    print("NET 3PT EFFICIENCY SCORE (Shows estimated 3pt margin for team given a fixed tempo)")
    print(f"{team1 + ':':<12} {teamA_net_eff:.2f}\t{team2 + ':':<12} {teamB_net_eff:.2f}")
    print()
    print("HEAD TO HEAD MATCHUP")
    print("OFFENSE VS DEFENSE IMPACT SCORE (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)")
    print(f"{team1 + ':':<12} {AB_net_impact:.2f}\t{team2 + ':':<12} {BA_net_impact:.2f}")
    print()
    print("OFFENSE VS DEFENSE EFFICIENCY SCORE (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)")
    print(f"{team1 + ':':<12} {AB_net_eff:.2f}\t{team2 + ':':<12} {BA_net_eff:.2f}")
    
    return teamA_net_impact, teamB_net_impact, teamA_net_eff, teamB_net_eff, AB_net_impact, BA_net_impact, AB_net_eff, BA_net_eff
    
def ft_matchup(team1, team2):
    
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2]
    
    min_off_impact = data_test['FT_Off_Impact'].min()
    max_off_impact = data_test['FT_Off_Impact'].max()
    min_off_eff = data_test['FT_Off_Eff'].min()
    max_off_eff = data_test['FT_Off_Eff'].max()
    min_def_impact = data_test['FT_Def_Impact'].min()
    max_def_impact = data_test['FT_Def_Impact'].max()
    min_def_eff = data_test['FT_Def_Eff'].min()
    max_def_eff = data_test['FT_Def_Eff'].max()
    
    min_impact_matchup = np.log(min_off_impact * min_def_impact)
    max_impact_matchup = np.log(max_off_impact * max_def_impact)
    min_eff_matchup = np.log(min_off_eff * min_def_eff)
    max_eff_matchup = np.log(max_off_eff * max_def_eff)
    
    # Impact: FT% * FTR * AdjT * (1 - TOV%)
    # Efficiency: FT% * FTR * 66 * (1 - TOV%)
    teamA_off_impact = teamA['FT_Off_Impact'].iloc[0]
    teamA_off_eff = teamA['FT_Off_Eff'].iloc[0]
    teamA_def_impact = teamA['FT_Def_Impact'].iloc[0]
    teamA_def_eff = teamA['FT_Def_Eff'].iloc[0]
    teamA_net_impact = teamA['FT_Net_Impact'].iloc[0]
    teamA_net_eff = teamA['FT_Net_Eff'].iloc[0]
    teamB_off_impact = teamB['FT_Off_Impact'].iloc[0]
    teamB_off_eff = teamB['FT_Off_Eff'].iloc[0]
    teamB_def_impact = teamB['FT_Def_Impact'].iloc[0]
    teamB_def_eff = teamB['FT_Def_Eff'].iloc[0]
    teamB_net_impact = teamB['FT_Net_Impact'].iloc[0]
    teamB_net_eff = teamB['FT_Net_Eff'].iloc[0]
    
    # Calculating Team A's offense vs Team B's defense and vice versa
    # Closer to -1: Advantage Defense, Closer to 0: Even matchup, Closer to 1: Advantage Offense
    AB_impact = np.log(teamA_off_impact * teamB_def_impact)
    AB_net_impact = 2 * ((AB_impact - min_impact_matchup) / (max_impact_matchup - min_impact_matchup)) - 1
    BA_impact = np.log(teamB_off_impact * teamA_def_impact)
    BA_net_impact = 2 * ((BA_impact - min_impact_matchup) / (max_impact_matchup - min_impact_matchup)) - 1
    AB_eff = np.log(teamA_off_eff * teamB_def_eff)
    AB_net_eff = 2 * ((AB_eff - min_eff_matchup) / (max_eff_matchup - min_eff_matchup)) - 1
    BA_eff = np.log(teamB_off_eff * teamA_def_eff)
    BA_net_eff = 2 * ((BA_eff - min_eff_matchup) / (max_eff_matchup - min_eff_matchup)) - 1
    
    print("\n\n\nFREE THROW COMPARISON")
    print()
    print("NET FT IMPACT SCORE (Shows estimated FT margin for team given adjusted tempo)")
    print(f"{team1 + ':':<12} {teamA_net_impact:.2f}\t{team2 + ':':<12} {teamB_net_impact:.2f}")
    print()
    print("NET FT EFFICIENCY SCORE (Shows estimated FT margin for team given a fixed tempo)")
    print(f"{team1 + ':':<12} {teamA_net_eff:.2f}\t{team2 + ':':<12} {teamB_net_eff:.2f}")
    print()
    print("HEAD TO HEAD MATCHUP")
    print("OFFENSE VS DEFENSE IMPACT SCORE (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)")
    print(f"{team1 + ':':<12} {AB_net_impact:.2f}\t{team2 + ':':<12} {BA_net_impact:.2f}")
    print()
    print("OFFENSE VS DEFENSE EFFICIENCY SCORE (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)")
    print(f"{team1 + ':':<12} {AB_net_eff:.2f}\t{team2 + ':':<12} {BA_net_eff:.2f}")
    
    return teamA_net_impact, teamB_net_impact, teamA_net_eff, teamB_net_eff, AB_net_impact, BA_net_impact, AB_net_eff, BA_net_eff
    
def to_matchup(team1, team2):
    
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2] 
    
    min_tov = data_test['TOV%'].min()
    max_tov = data_test['TOV%'].max()
    min_tovd = data_test['TOV%D'].min()
    max_tovd = data_test['TOV%D'].max()
    
    worst = np.log(max_tov * max_tovd)
    best = np.log(min_tov * min_tovd)
    
    teamA_tov = teamA['TOV%'].iloc[0]
    teamA_tovd = teamA['TOV%D'].iloc[0]
    teamA_diff = teamA['TOV%_Diff'].iloc[0]
    teamB_tov = teamB['TOV%'].iloc[0]
    teamB_tovd = teamB['TOV%D'].iloc[0]
    teamB_diff = teamB['TOV%_Diff'].iloc[0]
    
    AB = np.log(teamA_tov * teamB_tovd)
    AB_net = 2 * ((AB - worst) / (best - worst)) - 1
    BA = np.log(teamB_tov * teamA_tovd)
    BA_net = 2 * ((BA - worst) / (best - worst)) - 1
    
    print("\n\n\nTURNOVER COMPARISON")
    print()
    print("OFFENSIVE TURNOVER PERCENTAGE")
    print(f"{team1 + ':':<12} {teamA_tov}\t{team2 + ':':<12} {teamB_tov}")
    print()
    print("DEFENSIVE TURNOVER PERCENTAGE")
    print(f"{team1 + ':':<12} {teamA_tovd}\t{team2 + ':':<12} {teamB_tovd}")
    print()
    print("HEAD TO HEAD MATCHUP")
    print("OFFENSE VS DEFENSE TURNOVER PERCENTAGE (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)")
    print(f"{team1 + ':':<12} {AB_net:.2f}\t{team2 + ':':<12} {BA_net:.2f}")
    
    return teamA_tov, teamB_tov, teamA_tovd, teamB_tovd, AB_net, BA_net
    
def reb_matchup(team1, team2):
    
    teamA = data_test[data_test['Team'] == team1]
    teamB = data_test[data_test['Team'] == team2]  
    
    min_dreb = data_test['DREB%'].min()
    max_dreb = data_test['DREB%'].max()
    min_oreb = data_test['OREB%'].min()
    max_oreb = data_test['OREB%'].max()
    
    min_matchup = np.log(min_oreb / max_dreb)
    max_matchup = np.log(max_oreb / min_dreb)
    
    teamA_dreb = teamA['DREB%'].iloc[0]
    teamA_oreb = teamA['OREB%'].iloc[0]
    teamB_dreb = teamB['DREB%'].iloc[0]
    teamB_oreb = teamB['OREB%'].iloc[0]
    teamA_height = teamA['Height'].iloc[0]
    teamB_height = teamB['Height'].iloc[0]
    
    AB = np.log(teamA_oreb / teamB_dreb)
    AB_net = 2 * ((AB - min_matchup) / (max_matchup - min_matchup)) - 1
    BA = np.log(teamB_oreb / teamA_dreb)
    BA_net = 2 * ((BA - min_matchup) / (max_matchup - min_matchup)) - 1    
    
    print("\n\n\nREBOUNDING COMPARISON")
    print()
    print("EFFECTIVE HEIGHT")
    print(f"{team1 + ':':<12} {teamA_height:.2f}\t{team2 + ':':<12} {teamB_height:.2f}")
    print()
    print("OFFENSIVE REBOUNDING PERCENTAGE")
    print(f"{team1 + ':':<12} {teamA_oreb}\t{team2 + ':':<12} {teamB_oreb}")
    print()
    print("DEFENSIVE REBOUND PERCENTAGE")
    print(f"{team1 + ':':<12} {teamA_dreb}\t{team2 + ':':<12} {teamB_dreb}")
    print()
    print("HEAD TO HEAD MATCHUP")
    print("OFFENSIVE VS DEFENSIVE REBOUNDING (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)")
    print(f"{team1 + ':':<12} {AB_net:.2f}\t{team2 + ':':<12} {BA_net:.2f}")
    
    return teamA_height, teamB_height, teamA_oreb, teamB_oreb, teamA_dreb, teamB_dreb, AB_net, BA_net
    
    
    


In [811]:
team_matchup("Memphis", "Kansas")

				Memphis(5) vs. Kansas(7)

STRENGTH OF SCHEDULE COMPARISON

ELITE SOS
Memphis:     23.012	Kansas:      41.748

WINS ABOVE BUBBLE
Memphis:     4.9	Kansas:      4.1



TEMPO COMPARISON

ADJUSTED TEMPO
Memphis:     70.84	Kansas:      68.43
Suggestion: Look at the teams' respective Impact Scores



OFFENSE VS DEFENSE COMPARISON

ADJUSTED OFFENSIVE EFFICIENCY
Memphis:     113.30	Kansas:      116.02

ADJUSTED DEFENSIVE EFFICIENCY
Memphis:     98.16	Kansas:      92.76

OFFENSIVE EFFECTIVE FG%
Memphis:     0.536	Kansas:      0.531

DEFENSIVE EFFECTIVE FG%
Memphis:     0.485	Kansas:      0.462

HEAD TO HEAD MATCHUP
OFFENSE VS DEFENSE (Closer to -1: Disadvantage, Closer to 0: Even, Closer to 1: Advantage)
Memphis:     -0.26	Kansas:      0.05



TWO POINT COMPARISON

NET 2PT IMPACT SCORE (Shows estimated 2pt margin for team given adjusted tempo)
Memphis:     3.40	Kansas:      4.21

NET 2PT EFFICIENCY SCORE (Shows estimated 2pt margin for team given a fixed tempo)
Memphis:     3.16	Kansas:     

In [812]:
sorted_teams = data_test.sort_values(by='Proj_Wins', ascending=False)

for index, row in sorted_teams.iterrows():
    print(f"{row['Team']}({row['Seed']}): {row['Proj_Wins']:.2f}")

Houston(1): 4.67
Duke(1): 4.52
Florida(1): 4.03
Auburn(1): 3.16
Tennessee(2): 2.05
St. John's(2): 1.92
Alabama(2): 1.88
Texas Tech(3): 1.52
Maryland(4): 1.48
Iowa St.(3): 1.44
Arizona(4): 1.38
Michigan St.(2): 1.25
Gonzaga(8): 1.23
Michigan(5): 1.15
Missouri(6): 1.14
Texas A&M(4): 1.14
Clemson(5): 1.12
Wisconsin(3): 1.12
BYU(6): 1.12
Saint Mary's(7): 1.11
Illinois(6): 1.08
Purdue(4): 1.08
Kentucky(3): 1.07
Marquette(7): 1.06
Louisville(8): 1.02
Kansas(7): 0.99
UCLA(7): 0.97
Baylor(9): 0.90
Mississippi St.(8): 0.85
Georgia(9): 0.84
Mississippi(6): 0.83
Oregon(5): 0.81
VCU(11): 0.71
North Carolina(11): 0.71
Connecticut(8): 0.66
UC San Diego(12): 0.66
Creighton(9): 0.62
New Mexico(10): 0.62
Arkansas(10): 0.62
Vanderbilt(10): 0.61
Utah St.(10): 0.60
Texas(11): 0.57
Memphis(5): 0.56
Oklahoma(9): 0.55
San Diego St.(11): 0.53
McNeese St.(12): 0.49
Drake(11): 0.45
Colorado St.(12): 0.44
Xavier(11): 0.43
Yale(13): 0.38
Liberty(12): 0.37
High Point(13): 0.31
Akron(13): 0.25
Lipscomb(14): 0.24
Gr

In [807]:
data_test['Proj_Wins'].sum()

63.0

In [None]:
X_train_const = sm.add_constant(x_train)
    model = sm.GLM(y_train_series, X_train_const, family=sm.families.NegativeBinomial())
    results = model.fit()

    X_test_const = sm.add_constant(x_test)
    y_pred = results.predict(X_test_const)
    
    return y_pred

In [None]:
model = make_pipeline(StandardScaler(), PoissonRegressor(alpha=1.0, max_iter=10000))
    model.fit(x_train, y_train_series)
    y_pred = model.predict(x_test)