In [2]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
import statsmodels.api as sm
import xgboost
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
import scipy
import random
import pickle
import joblib

In [3]:
#Import model and data

xreb = pickle.load(open("/log_xrebounds.pkl", 'rb'))
xg_model=pickle.load(open("/xgboost_xg.pkl", 'rb'))
shots_sorted=pd.read_csv(r'/shots_2022.csv')

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 15)

In [None]:
def conditions(shots_sorted):
    if (shots_sorted['team'] == 'HOME'):
        return shots_sorted['awayTeamCode']
    else:
        return shots_sorted['homeTeamCode']

In [None]:
def new_conditions(shots_sorted):
    if (shots_sorted['homeTeamWon'] == 1):
        return shots_sorted['homeTeamCode']
    else:
        return shots_sorted['awayTeamCode']

In [None]:
# Create function to clean dataset

def clean_data(shots_sorted):

    """
    1. clean the shots data
    2. add new necessary columns
    """
    ###add shot types
    shots_sorted['type_backhand']=np.where(shots_sorted['shotType'] == 'BACK', 1,0)
    shots_sorted['type_wrist']=np.where(shots_sorted['shotType'] == 'WRIST',1,0)
    shots_sorted['type_slap']=np.where(shots_sorted['shotType'] == 'SLAP',1,0)
    shots_sorted['type_tipin']=np.where(shots_sorted['shotType'] == 'TIP',1,0)
    shots_sorted['type_deflect']=np.where(shots_sorted['shotType'] == 'DEFL',1,0)
    shots_sorted['type_wrap']=np.where(shots_sorted['shotType'] == 'WRAP',1,0)
    shots_sorted['type_snap']=np.where(shots_sorted['shotType'] == 'SNAP',1,0)
    shots_sorted['shotType'].fillna("NA", inplace=True)
    
    #add strengths
    shots_sorted['strength_3x3']=np.where((shots_sorted['homeSkatersOnIce'] == 3) & (shots_sorted['awaySkatersOnIce'] == 3),1,0)
    shots_sorted['strength_3x4']=np.where((shots_sorted['homeSkatersOnIce'] == 3) & (shots_sorted['awaySkatersOnIce'] == 4),1,0)
    shots_sorted['strength_3x5']=np.where((shots_sorted['homeSkatersOnIce'] == 3) & (shots_sorted['awaySkatersOnIce'] == 5),1,0)
    shots_sorted['strength_3x6']=np.where((shots_sorted['homeSkatersOnIce'] == 3) & (shots_sorted['awaySkatersOnIce'] == 6),1,0)
    shots_sorted['strength_4x3']=np.where((shots_sorted['homeSkatersOnIce'] == 4) & (shots_sorted['awaySkatersOnIce'] == 3),1,0)
    shots_sorted['strength_4x4']=np.where((shots_sorted['homeSkatersOnIce'] == 4) & (shots_sorted['awaySkatersOnIce'] == 4),1,0)
    shots_sorted['strength_4x5']=np.where((shots_sorted['homeSkatersOnIce'] == 4) & (shots_sorted['awaySkatersOnIce'] == 5),1,0)
    shots_sorted['strength_4x6']=np.where((shots_sorted['homeSkatersOnIce'] == 4) & (shots_sorted['awaySkatersOnIce'] == 6),1,0)
    shots_sorted['strength_5x3']=np.where((shots_sorted['homeSkatersOnIce'] == 5) & (shots_sorted['awaySkatersOnIce'] == 3),1,0)
    shots_sorted['strength_5x4']=np.where((shots_sorted['homeSkatersOnIce'] == 5) & (shots_sorted['awaySkatersOnIce'] == 4),1,0)
    shots_sorted['strength_5x5']=np.where((shots_sorted['homeSkatersOnIce'] == 5) & (shots_sorted['awaySkatersOnIce'] == 5),1,0)
    shots_sorted['strength_5x6']=np.where((shots_sorted['homeSkatersOnIce'] == 5) & (shots_sorted['awaySkatersOnIce'] == 6),1,0)
    shots_sorted['strength_6x3']=np.where((shots_sorted['homeSkatersOnIce'] == 6) & (shots_sorted['awaySkatersOnIce'] == 3),1,0)
    shots_sorted['strength_6x4']=np.where((shots_sorted['homeSkatersOnIce'] == 6) & (shots_sorted['awaySkatersOnIce'] == 4),1,0)
    shots_sorted['strength_6x5']=np.where((shots_sorted['homeSkatersOnIce'] == 6) & (shots_sorted['awaySkatersOnIce'] == 5),1,0)

    ### add column for forward
    shots_sorted['isforward'] = np.where(shots_sorted['playerPositionThatDidEvent'].isin(["LW", "RW", "C"]), 1, 0)

    ### remove playoffs & shootouts
    shots_sorted=shots_sorted[shots_sorted.isPlayoffGame != 1]

    ### remove goalie shots
    shots_sorted['goalieNameForShot'].fillna("Empty", inplace=True)
   

    ### add outcomes
    shots_sorted['Outcome'] = np.where(shots_sorted['event'] == "GOAL", 2, np.where(shots_sorted['event'] == "SHOT", 1, np.where(shots_sorted['event'] == "MISS", 0, 3)))
    shots_sorted = shots_sorted[shots_sorted['Outcome'] != 3]

    # Change giveaway to takeaway for other team
    shots_sorted['lastEventTeam'] = np.where(shots_sorted['lastEventCategory'] != "GIVE", shots_sorted["lastEventTeam"],np.where(shots_sorted['lastEventTeam'] == 'HOME', 'AWAY', 'HOME'))
    shots_sorted['lastEventCategory'] = np.where(shots_sorted['lastEventCategory'] == "GIVE", "TAKE", shots_sorted['lastEventCategory'])

    ### add column for same team events
    shots_sorted['if_prev_ev_team'] = np.where(shots_sorted['team'] == shots_sorted['lastEventTeam'], 1, 0)

    # Get if last event was by event team for specified events
    shots_sorted['prev_evTeam_Fac'] = np.where((shots_sorted['if_prev_ev_team'] == 1) & (shots_sorted['lastEventCategory'] == "FAC"), 1, 0)
    shots_sorted['prev_evTeam_NonSog'] = np.where((shots_sorted['if_prev_ev_team'] == 1) & (shots_sorted['lastEventCategory'].isin(["MISS", "BLOCK"])), 1, 0)
    shots_sorted['prev_evTeam_NonShot'] = np.where((shots_sorted['if_prev_ev_team'] == 1) & (shots_sorted['lastEventCategory'].isin(["TAKE", "HIT"])), 1, 0)
    shots_sorted['prev_evTeam_Sog'] = np.where((shots_sorted['if_prev_ev_team'] == 1) & (shots_sorted['lastEventCategory'] == "SHOT"), 1, 0)

    # Get if last event was by non-event team for specified events
    shots_sorted['prev_non_evTeam_Fac'] = np.where((shots_sorted['if_prev_ev_team'] == 0) & (shots_sorted['lastEventCategory'] == "FAC"), 1, 0)
    shots_sorted['prev_non_evTeam_NonSog'] = np.where((shots_sorted['if_prev_ev_team'] == 0) & (shots_sorted['lastEventCategory'].isin(["MISS", "BLOCK"])),1, 0)
    shots_sorted['prev_non_evTeam_NonShot'] = np.where((shots_sorted['if_prev_ev_team'] == 0) & (shots_sorted['lastEventCategory'].isin(["TAKE", "HIT"])), 1,0)
    shots_sorted['prev_non_evTeam_Sog'] = np.where((shots_sorted['if_prev_ev_team'] == 0) & (shots_sorted['lastEventCategory'] == "SHOT"), 1, 0)

    ### add column for non-SOG rebounds
    shots_sorted['non_sog_rebound'] = np.where((shots_sorted['lastEventCategory'].isin(["MISS", "BLOCK"])) & (shots_sorted['timeUntilNextEvent'] <= 2.0)& (shots_sorted['team'] == shots_sorted['lastEventTeam']), 1, 0)
    
    
    ### add goalie categories
    shots_sorted['goalieTeam'] = shots_sorted.apply(conditions, axis=1)
    shots_sorted['teamWin'] = shots_sorted.apply(new_conditions, axis=1)
    shots_sorted['goalieWin'] = shots_sorted.apply(goalie_win, axis=1)

    ### add score categories
    ### stop at +3 and -3
    shots_sorted['score_cat'] = np.where(shots_sorted['homeTeamGoals'] - shots_sorted['awayTeamGoals'] >= 3, 3, np.where(shots_sorted['homeTeamGoals'] - shots_sorted['awayTeamGoals'] <= -3, -3, shots_sorted['homeTeamGoals'] - shots_sorted['awayTeamGoals']))
    shots_sorted['score_cat'] = np.where(shots_sorted['teamCode'] == shots_sorted['isHomeTeam'], shots_sorted['score_cat'], -shots_sorted['score_cat'])
    shots_sorted['score_cat_3'] = np.where((shots_sorted['score_cat'] >= 3), 1, 0)
    shots_sorted['score_cat_2'] = np.where((shots_sorted['score_cat'] == 2), 1, 0)
    shots_sorted['score_cat_1'] = np.where((shots_sorted['score_cat'] == 1), 1, 0)
    shots_sorted['score_cat_0'] = np.where((shots_sorted['score_cat'] == 0), 1, 0)
    shots_sorted['score_cat_-1'] = np.where((shots_sorted['score_cat'] == -1), 1, 0)
    shots_sorted['score_cat_-2'] = np.where((shots_sorted['score_cat'] == -2), 1, 0)
    shots_sorted['score_cat_-3'] = np.where((shots_sorted['score_cat'] <= -3), 1, 0)


    ### drop dupes
    shots_sorted = shots_sorted[shots_sorted["arenaAdjustedXCord"].notnull()]
    shots_sorted = shots_sorted[shots_sorted["arenaAdjustedYCord"].notnull()]
    shots_sorted = shots_sorted[shots_sorted["lastEventxCord_adjusted"].notnull()]
    shots_sorted = shots_sorted[shots_sorted["lastEventyCord_adjusted"].notnull()]
    

    return shots_sorted

shots_sorted=clean_data(shots_sorted)
display(shots_sorted)

In [None]:
# Convert data to model format

data = shots_sorted
    
data['Outcome'] = np.where(shots_sorted['Outcome'] == 0, 0, np.where(shots_sorted['Outcome'] == 1, 0, np.where(shots_sorted['Outcome'] == 2, 1, 3)))
data = shots_sorted[shots_sorted['Outcome'] != 3]

####Convert the data to use in model
all_variables = ['arenaAdjustedShotDistance', 'arenaAdjustedXCord','lastEventxCord_adjusted','arenaAdjustedYCord','lastEventyCord_adjusted','shotAngleAdjusted','awayEmptyNet', 'homeEmptyNet', 'speedFromLastEvent', 'shotAnglePlusReboundSpeed','distanceFromLastEvent', 'timeSinceLastEvent','type_backhand', 'type_deflect', 'type_slap','type_snap', 'type_tipin', 'type_wrap', 'type_wrist','strength_3x3','strength_3x4', 'strength_3x5', 'strength_3x6', 'strength_4x3','strength_4x4', 'strength_4x5', 'strength_4x6', 'strength_5x3','strength_5x4', 'strength_5x5', 'strength_5x6', 'strength_6x3', 'strength_6x4', 'strength_6x5','score_cat_-3', 'score_cat_-2', 'score_cat_-1', 'score_cat_0', 'score_cat_1', 'score_cat_2', 'score_cat_3','isforward','isHomeTeam','prev_evTeam_Fac', 'prev_evTeam_NonSog', 'prev_evTeam_NonShot', 'prev_evTeam_Sog','prev_non_evTeam_Fac', 'prev_non_evTeam_NonSog', 'prev_non_evTeam_NonShot', 'prev_non_evTeam_Sog']

categorical_variables = ['shotType', 'score_cat', 'lastEventCategory']
labels = ['Outcome']

df_dummies = pd.get_dummies(data, columns=categorical_variables)
model_df = df_dummies[all_variables + ["Outcome"]]

model_features = model_df[all_variables].values.tolist()
model_labels = model_df[labels].values.tolist()

# Convert to lists
features=model_features
labels=model_labels

# Split into training and testing sets -> 80/20
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=.2, random_state=42)

# Fix Data
features_train, labels_train = np.array(features_train), np.array(labels_train).ravel()

In [None]:
# Use model to calculate goal probability of all shots

xgboost_predict=xg_model.predict_proba(features)

In [None]:
# Create dataframe of probabilities

xgb=data[['Outcome', 'shotID']]
xgpredict=pd.DataFrame(xgboost_predict, columns=['0','1'])
xgb=xgb.join(xgpredict, how='right')
xgb.dropna(inplace=True)
display(xgb)

In [None]:
# Merge xGoal probabilities into main dataset

xgb=xgb.rename(columns={"0":"xNotGoal","1":"xGoal"})
xgb=xgb.drop(columns=["Outcome", "shotID", "xNotGoal"])
xgboost_xg=xgb.join(shots_sorted, how='left')
xgboost_xg['GSAx'] = xgboost_xg['xGoal'] - xgboost_xg['goal']
xgboost_xg['shot_attempt'] = 1
display(xgboost_xg)

In [None]:
data = shots_sorted
    
data['shotGeneratedRebound'] = np.where(data['shotGeneratedRebound'] == 0, 0, np.where(data['shotGeneratedRebound'] == 1, 1, 2))
data = data[data['shotGeneratedRebound'] != 2]

####Convert the data to use in model
all_variables = ['arenaAdjustedShotDistance', 'arenaAdjustedXCord','lastEventxCord_adjusted','arenaAdjustedYCord','lastEventyCord_adjusted','shotAngleAdjusted','awayEmptyNet', 'homeEmptyNet', 'speedFromLastEvent', 'shotAnglePlusReboundSpeed','distanceFromLastEvent', 'timeSinceLastEvent','type_backhand', 'type_deflect', 'type_slap','type_snap', 'type_tipin', 'type_wrap', 'type_wrist','strength_3x3','strength_3x4', 'strength_3x5', 'strength_3x6', 'strength_4x3','strength_4x4', 'strength_4x5', 'strength_4x6', 'strength_5x3','strength_5x4', 'strength_5x5', 'strength_5x6', 'strength_6x3', 'strength_6x4', 'strength_6x5','score_cat_-3', 'score_cat_-2', 'score_cat_-1', 'score_cat_0', 'score_cat_1', 'score_cat_2', 'score_cat_3','isforward','isHomeTeam','prev_evTeam_Fac', 'prev_evTeam_NonSog', 'prev_evTeam_NonShot', 'prev_evTeam_Sog','prev_non_evTeam_Fac', 'prev_non_evTeam_NonSog', 'prev_non_evTeam_NonShot', 'prev_non_evTeam_Sog']

categorical_variables = ['shotType', 'score_cat', 'lastEventCategory']
labels = ['shotGeneratedRebound']

df_dummies = pd.get_dummies(data, columns=categorical_variables)
model_df = df_dummies[all_variables + ["shotGeneratedRebound"]]

model_features = model_df[all_variables].values.tolist()
model_labels = model_df[labels].values.tolist()

# Convert to lists
features=model_features
labels=model_labels

# Split into training and testing sets -> 80/20
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=.2, random_state=42)

# Fix Data
features_train, labels_train = np.array(features_train), np.array(labels_train).ravel()

In [None]:
xreb_predict = xreb.predict_proba(features)

In [None]:
# Create dataframe of probabilities

xrebs=data[['shotGeneratedRebound', 'shotID']]
xrebounds=pd.DataFrame(xreb_predict, columns=['0','1'])
xrebs=xrebs.join(xrebounds, how='right')
xrebs.dropna(inplace=True)
xrebs=xrebs.rename(columns={"0":"xNotRebound","1":"xRebound"})
xrebs=xrebs.drop(columns=["shotGeneratedRebound", "xNotRebound", "shotID"])
xgboost_xg=xrebs.join(xgboost_xg, how='left')
xgboost_xg['RSAx'] = xgboost_xg['xRebound'] - xgboost_xg['shotGeneratedRebound']
display(xgboost_xg)

In [None]:
# Create a GameScore for goalies

game_score_xgb = xgboost_xg[['goalieNameForShot', 'goalieTeam', 'teamCode','xGoal', 'goal', 'GSAx', "xRebound", 'RSAx', 'season', 'game_id', 'shotWasOnGoal','shotGoalieFroze','shotRebound','shotRush']]
game_score_xgb=game_score_xgb[game_score_xgb.goalieNameForShot != 'Empty']
game_score_xgb=game_score_xgb.groupby(['goalieNameForShot', 'goalieTeam', 'teamCode','game_id','season']).sum()
game_score_xgb['shotSaved']=game_score_xgb['shotWasOnGoal'] - game_score_xgb['goal']
game_score_xgb['savePCT']=game_score_xgb['shotSaved']/game_score_xgb['shotWasOnGoal']
game_score_xgb['xSvPCT'] = (game_score_xgb['shotWasOnGoal']-game_score_xgb['xGoal']) / game_score_xgb['shotWasOnGoal']
game_score_xgb['SPAx'] = game_score_xgb['savePCT'] - game_score_xgb['xSvPCT']
game_score_xgb['gameScore']=(-0.75*game_score_xgb['goal']) + (0.1*game_score_xgb['shotSaved']) + (0.3*game_score_xgb['GSAx'] + (0.05*game_score_xgb['RSAx']))
game_score_xgb['GP']=1
game_score_xgb=game_score_xgb.reset_index()
game_score_xgb=game_score_xgb.astype({'season':'int64','game_id':'int64', 'goal':'int64','shotSaved':'int64','shotWasOnGoal':'int64'})
display(game_score_xgb.style.format({'savePCT': "{:.2%}".format,'xSvPCT': "{:.2%}".format, 'SPAx':"{:.2%}".format}))

In [None]:
individual = game_score_xgb[game_score_xgb.goalieNameForShot == ""]
display(individual.sort_values(by=['game_id'], ascending=True).style.format({'savePCT': "{:.2%}",'xSvPCT': "{:.2%}", 'SPAx':"{:.2%}"}))

In [None]:
sns.scatterplot(
    data=game_score_xgb, x="shotWasOnGoal", y="shotRebound", lw=3, markers=True,
    sizes=(20, 250)
)

In [None]:
# Create dataset grouped by game & shot ID's

goalie_shot = xgboost_xg[['goalieNameForShot','shooterName', 'xGoal', 'goal', 'GSAx', 'game_id','shotID','shotType','shotWasOnGoal']]
goalie_shot=goalie_shot.groupby(['goalieNameForShot','shooterName','game_id','shotID','shotType']).sum()
goalie_shot=goalie_shot.reset_index()
goalie_shot=goalie_shot[goalie_shot.goalieNameForShot != "Empty"]
goalie_shot=goalie_shot.astype({'shotID':'int64','game_id':'int64', 'goal':'int64'})
display(goalie_shot.groupby(['game_id','shotID','goalieNameForShot', 'shooterName','shotType']).sum().sort_values(by=['xGoal'], ascending=False))

In [None]:
# Create season-level dataset for shooter xGoals

shooter_xgb = xgboost_xg[['teamCode','shooterName','goalieNameForShot', 'xGoal', 'goal', 'GSAx', 'season', 'shot_attempt','shotWasOnGoal','shotType', 'shotGeneratedRebound','shotRush', 'shotRebound']]
shooter_xgb=shooter_xgb.groupby(['teamCode','season','shooterName']).sum()
shooter_xgb.drop(columns=['GSAx'], inplace=True)
shooter_xgb['GoalsAboveExpected']=shooter_xgb['goal']-shooter_xgb['xGoal']
shooter_xgb['ShootingPCT'] = shooter_xgb['goal']/shooter_xgb['shot_attempt']
shooter_xgb['xShootingPCT'] = shooter_xgb['xGoal']/shooter_xgb['shot_attempt']
shooter_xgb['sogPCT'] = shooter_xgb['shotWasOnGoal'] / shooter_xgb['shot_attempt']
shooter_xgb.replace([np.inf, -np.inf], 0, inplace=True)
shooter_xgb=shooter_xgb.sort_values(by=['GoalsAboveExpected'], ascending=False)
shooter_xgb=shooter_xgb.reset_index()
shooter_xgb=shooter_xgb.astype({'season':'int64','goal':'int64','shotWasOnGoal':'int64'})
shooter_xgb = shooter_xgb.sort_values(by=['GoalsAboveExpected'], ascending=False)
display(shooter_xgb)

In [None]:
sns.scatterplot(
    data=shooter_xgb, x="xGoal", y="shotRebound", hue="xGoal",
    sizes=(20, 250)
)

In [None]:
# Create game-level dataset for shooters

shooter_game = xgboost_xg[['teamCode','shooterName', 'game_id', 'xGoal', 'goal', 'GSAx']]
shooter_game=shooter_game.groupby(['teamCode','game_id','shooterName']).sum()
shooter_game.drop(columns=['GSAx'], inplace=True)
shooter_game['GoalsAboveExpected']=shooter_game['goal']-shooter_game['xGoal']
shooter_game=shooter_game.reset_index()
shooter_game=shooter_game.astype({'game_id':'int64','goal':'int64'})
display(shooter_game.groupby(['game_id','teamCode','shooterName']).sum())

In [None]:
# Create game-level shooter v. goalie dataset

shooter_shot = xgboost_xg[['teamCode','shooterName','goalieNameForShot', 'game_id', 'xGoal', 'goal', 'GSAx','shotType', 'shotWasOnGoal']]
shooter_shot=shooter_shot.groupby(['teamCode','game_id','shooterName','goalieNameForShot','shotType']).sum()
shooter_shot=shooter_shot.reset_index()
shooter_shot=shooter_shot.astype({'game_id':'int64','goal':'int64', 'shotWasOnGoal':'int64'})
display(shooter_shot.groupby(['game_id','shooterName','goalieNameForShot','shotType']).sum())

In [None]:
# Create season-level GameScore dataset for goalies

season_score=game_score_xgb[['goalieNameForShot','goalieTeam', 'season','xGoal', 'goal', 'GSAx', 'xRebound', 'RSAx', 'gameScore','GP', 'shotSaved', 'shotWasOnGoal', 'shotGoalieFroze','shotRush', 'shotRebound']]
season_score=season_score.groupby(['goalieNameForShot','goalieTeam','season']).sum()
season_score['gameScoreAVG']=season_score['gameScore']/season_score['GP']
season_score.drop(columns=['gameScore'], inplace=True)
season_score['savePCT']=season_score['shotSaved']/season_score['shotWasOnGoal']
season_score['xSvPCT'] = (season_score['shotWasOnGoal']-season_score['xGoal']) / season_score['shotWasOnGoal']
season_score['SPAx'] = season_score['savePCT'] - season_score['xSvPCT']
season_score=season_score.astype({'shotSaved':'int64','shotWasOnGoal':'int64'})
season_score=season_score.sort_values(by=['GSAx'], ascending=False)
season_score=season_score.reset_index()
display(season_score.style.format({'savePCT': "{:.2%}",'xSvPCT': "{:.2%}", 'SPAx':"{:.2%}"}))

In [None]:
# team-level season stats

team_xg = season_score[['goalieTeam', 'xGoal', 'goal', 'GSAx', 'season', 'shotWasOnGoal', 'shotSaved', 'shotGoalieFroze','shotRush', 'shotRebound']]
team_xg = team_xg.rename(columns={'goalieTeam':'teamCode', 'goal':'GA', 'xGoal':'xGA', 'shotWasOnGoal':'sogAgainst', 'shotRush':'shotRushAgainst', 'shotRebound':'shotReboundAgainst'})
team_xg = team_xg.groupby(['teamCode', 'season']).sum()
team_xg1 = shooter_xgb[['teamCode', 'xGoal', 'goal', 'GoalsAboveExpected', 'season', 'shot_attempt','shotWasOnGoal', 'shotGeneratedRebound','shotRush', 'shotRebound']]
team_xg1 = team_xg1.groupby(['teamCode', 'season']).sum()
team_xg = team_xg.join(team_xg1, how='right')
team_xg['xGPCT'] = team_xg['xGoal']/(team_xg['xGoal']+team_xg['xGA'])
team_xg['aGPCT'] = team_xg['goal']/(team_xg['goal']+team_xg['GA'])
team_xg['gPCTdiff'] = team_xg['aGPCT'] - team_xg['xGPCT']
team_xg['shootingPCT'] = team_xg['goal'] / team_xg['shot_attempt']
team_xg['xShootingPCT'] = team_xg['xGoal'] / team_xg['shot_attempt']
team_xg['savePCT']=team_xg['shotSaved']/team_xg['sogAgainst']
team_xg['xSvPCT'] = (team_xg['sogAgainst']-team_xg['xGoal']) / team_xg['sogAgainst']
team_xg=team_xg.sort_values(by=['xGPCT'], ascending=False)
team_xg=team_xg.astype({'shotSaved':'int64','sogAgainst':'int64'})
team_xg=team_xg.reset_index()
display(team_xg.style.format({'savePCT': "{:.2%}",'xSvPCT': "{:.2%}", 'xGPCT':"{:.2%}", 'aGPCT':"{:.2%}", 'gPCTdiff':"{:.2%}", 'shootingPCT':"{:.2%}", 'xShootingPCT':"{:.2%}"}))

In [None]:
sns.scatterplot(
    data=season_score, x="savePCT", y="GSAx",
    sizes=(20, 250),
)

In [None]:
pip install hockey_rink

In [None]:
from hockey_rink import NHLRink

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(14, 8))
rink = NHLRink(rotation=270)
for i in range(2):
    rink.draw(ax=axs[i], display_range="ozone")
contour_img = rink.contourf(xgboost_xg.arenaAdjustedXCordABS, xgboost_xg.arenaAdjustedYCord, values=xgboost_xg.goal, ax=axs[0], cmap="bwr", 
                            plot_range="ozone", binsize=8, levels=75, statistic="sum")
plt.colorbar(contour_img, ax=axs[0], orientation="horizontal")
rink.heatmap(xgboost_xg.arenaAdjustedXCordABS, xgboost_xg.arenaAdjustedYCord, values=xgboost_xg.xGoal, ax=axs[1], cmap="magma",
             plot_xlim=(25, 89), statistic="sum", vmax=50, binsize=5)