In [1]:
# Pandas
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np

# Polars (Arrow)
from pyarrow.dataset import dataset
import polars as pl
pl.Config.set_tbl_rows(n=-1)
pl.Config.set_tbl_cols(n=-1)


# Hockey Specific
import sportsdataverse as sdv

# Hit API
import requests

# Load Expected Goals Data

In [2]:
model_path = "FinalExpectedGoalsPredictions.parquet"
PBP_xG = pl.read_parquet(model_path)

# Create Standings Table

In [3]:
pythag_exp = 1.86
Def = (
    PBP_xG
    .with_columns((pl.when(pl.col('event_team_abbr') == pl.col('home_abbreviation'))
                     .then(pl.col('away_abbreviation'))
                     .otherwise(pl.col('home_abbreviation'))).alias('team'))
     .filter(((pl.col('season')==2024)) & (pl.col('season_type') == 'R') & (pl.col('model_type') != 'EN'))
     .groupby('team')
     .agg([
         (pl.col('xG').sum()).alias('xGoals_Against'),
         (pl.col('is_goal').sum()).alias('Goals_Against')
          ])
     .with_columns((pl.col('Goals_Against') - pl.col('xGoals_Against')).alias('D_Diff_Over_Under'))
     .sort('xGoals_Against', descending = True)
)

Off = (
    PBP_xG
    .with_columns(pl.col('event_team_abbr').alias('team'))
    .filter(((pl.col('season')==2024)) & (pl.col('season_type') == 'R') & (pl.col('model_type') != 'EN'))
    .groupby('team')
    .agg([(pl.col('xG').sum()).alias('xGoals_For'),
          (pl.col('is_goal').sum().alias('Goals_For'))
          ])
     .with_columns((pl.col('Goals_For') - pl.col('xGoals_For')).alias('O_Diff_Over_Under'))
     .sort('xGoals_For', descending = True)
)

NHL_Stats = (
    Off
    .join(Def, on = 'team', how = "inner")
    .with_columns([
    (pl.col('xGoals_For') - pl.col('xGoals_Against')).alias('xG_Difference'),
    (pl.col('Goals_For') - pl.col('Goals_Against')).alias('G_Difference')
     ])
     .with_columns(
         (pl.col('G_Difference') - (pl.col('xG_Difference') )).alias('Diff_Over_Under'),
         ((pl.col('xGoals_For').pow(pythag_exp)) / ((pl.col('xGoals_For').pow(pythag_exp)) + (pl.col('xGoals_Against').pow(pythag_exp)))).alias('xGWin_Pct'),
         ((pl.col('Goals_For').pow(pythag_exp)) / ((pl.col('Goals_For').pow(pythag_exp)) + (pl.col('Goals_Against').pow(pythag_exp)))).alias('GWin_Pct'),
         ((((pl.col('Goals_For') + pl.col('xGoals_For'))/2).pow(pythag_exp)) / ((((pl.col('Goals_For') + pl.col('xGoals_For'))/2).pow(pythag_exp)) + (((pl.col('Goals_Against') + pl.col('xGoals_Against'))/2).pow(pythag_exp)))).alias('Weighted_Win_Pct')
         )
     .sort('Weighted_Win_Pct', descending=True)
)

NHL_Stats.head(5) #.filter(pl.col('team').is_in(bet_teams))

team,xGoals_For,Goals_For,O_Diff_Over_Under,xGoals_Against,Goals_Against,D_Diff_Over_Under,xG_Difference,G_Difference,Diff_Over_Under,xGWin_Pct,GWin_Pct,Weighted_Win_Pct
str,f32,i32,f64,f32,i32,f64,f32,i32,f64,f32,f64,f64
"""VAN""",35.697395,52,16.302605,38.260727,23,-15.260727,-2.563332,29,31.563332,0.467799,0.82014,0.660894
"""LAK""",36.181,44,7.819,27.379593,30,2.620407,8.801407,14,5.198593,0.626785,0.670924,0.650753
"""PIT""",40.25325,35,-5.25325,27.973797,29,1.026203,12.279453,6,-6.279453,0.663046,0.586564,0.626581
"""NSH""",39.331017,31,-8.331017,28.797981,28,-0.797981,10.533035,3,-7.533035,0.641016,0.547188,0.598088
"""VGK""",35.357235,44,8.642765,37.139286,27,-10.139286,-1.782051,17,18.782051,0.477151,0.71266,0.597726


# Get Today's Games

In [75]:
# Variables
date_str = "2023-11-27"

# Create Predictions
sched_link = "https://api-web.nhle.com/v1/schedule/"+date_str
response = requests.get(sched_link)

# Parse the JSON content of the response
raw_data = pd.json_normalize(response.json())
sched_data = pd.json_normalize(raw_data['gameWeek'][0])

# Get Odds
odds_data = pd.json_normalize(raw_data['oddsPartners'][0])
US_Prov_ID = int(odds_data[odds_data['country'] == 'US']['partnerId'].iloc[0])
sched_data = pd.json_normalize(sched_data['games'][0])
#print(sched_data)
game_df = sched_data[['id', 'season', 'gameType', 'gameState', 'awayTeam.abbrev', 'homeTeam.abbrev', 'homeTeam.odds', 'awayTeam.odds', 'gameCenterLink']]

In [72]:
game_dfs = []
idx = 0

## Existing Game Odds
exist_id = []
exist_home_odds = []
exist_away_odds = []

exist_df = pd.DataFrame({
    'id': exist_id,
    'home_odds': exist_home_odds,
    'away_odds': exist_away_odds
})


for i in game_df['id']:

    raw_df = game_df[game_df['id'] == i]

    try:
        # Load Game Data
        home_odds_df = pd.json_normalize(raw_df['homeTeam.odds'][idx])
        away_odds_df = pd.json_normalize(raw_df['awayTeam.odds'][idx])

        home_odd = home_odds_df[home_odds_df['providerId'] == US_Prov_ID]['value'].iloc[0]
        away_odd = away_odds_df[away_odds_df['providerId'] == US_Prov_ID]['value'].iloc[0]

        df = raw_df[['id', 'gameType', 'season', 'awayTeam.abbrev', 'homeTeam.abbrev', 'gameState']]
        df = df.assign(home_odds=home_odd, away_odds=away_odd, game_date=date_str)

        game_dfs.append(df)

    except Exception as e:

        # Load Game Data
        df = raw_df[raw_df['id'] == i]

        home_odd = exist_df[exist_df['id'] == i]['home_odds'].iloc[0]
        away_odd = exist_df[exist_df['id'] == i]['away_odds'].iloc[0]

        df = raw_df[['id', 'gameType', 'season', 'awayTeam.abbrev', 'homeTeam.abbrev','gameState']]
        df = df.assign(home_odds=home_odd, away_odds=away_odd, game_date=date_str).astype({'home_odds': 'float64', 'away_odds': 'float64'})

        gme_lab = str(df['awayTeam.abbrev'].iloc[0])+ ' ('+ str(round(away_odd, 0)) + ') @ '+str(df['homeTeam.abbrev'].iloc[0]+ ' ('+ str(round(home_odd, 0)) + ')')

        print(f"{gme_lab} Has Already Started")

        game_dfs.append(df)

    # Move To Next Game
    idx += 1

# Concatenate all DataFrames in the game_dfs list into a single polarsDataFrame
result_df = pd.concat(game_dfs, ignore_index=True).astype({'home_odds': 'float64', 'away_odds': 'float64'})
result_df = pl.DataFrame(result_df)

In [73]:
Bet_DF = (
    result_df
    .join(NHL_Stats.select([(pl.col(col).alias(f'away_{col}')) for col in NHL_Stats.columns]), left_on=['awayTeam.abbrev'], right_on=['away_team'])
    .join(NHL_Stats.select([(pl.col(col).alias(f'home_{col}')) for col in NHL_Stats.columns]), left_on=['homeTeam.abbrev'], right_on=['home_team'])
    .with_columns([
        ((pl.col('home_Weighted_Win_Pct')) * (1 - pl.col('away_Weighted_Win_Pct'))).alias('home_win'),
        ((pl.col('away_Weighted_Win_Pct')) * (1 - pl.col('home_Weighted_Win_Pct'))).alias('away_win'),
        (pl.when(pl.col('home_odds') < 0).then((-1*(pl.col('home_odds'))) / ((-1*(pl.col('home_odds')) + 100))).otherwise(100 / (pl.col('home_odds') + 100))).alias('home_imp_prob'),
        (pl.when(pl.col('away_odds') < 0).then((-1*(pl.col('away_odds'))) / ((-1*(pl.col('away_odds')) + 100))).otherwise(100 / (pl.col('away_odds') + 100))).alias('away_imp_prob')
    ])
    .with_columns([
        (pl.when(pl.col('gameType') == 2).then(((pl.col('home_win')) / (pl.col('home_win') + pl.col('away_win'))) + pl.lit(0.045)).otherwise(((pl.col('home_win')) / (pl.col('home_win') + pl.col('away_win'))) + pl.lit(0.049))).alias('home_win'),
        (pl.when(pl.col('gameType') == 2).then(((pl.col('away_win')) / (pl.col('home_win') + pl.col('away_win'))) + pl.lit(0.045)).otherwise(((pl.col('away_win')) / (pl.col('home_win') + pl.col('away_win'))) + pl.lit(0.049))).alias('away_win')
        
    ])
    .with_columns([
        (pl.col('home_win') - pl.col('home_imp_prob')).alias('home_xAdvantage'),
        (pl.col('away_win') - pl.col('away_imp_prob')).alias('away_xAdvantage')
    ])
    .with_columns([
        (pl.when(pl.col('home_xAdvantage') > pl.col('away_xAdvantage')).then(pl.col('home_xAdvantage')).otherwise(pl.col('away_xAdvantage'))).alias('Advantage'),
        (pl.when(pl.col('home_xAdvantage') > pl.col('away_xAdvantage')).then(pl.col('homeTeam.abbrev')).otherwise(pl.col('awayTeam.abbrev'))).alias('Bet_Team')
    ])
    .rename({"awayTeam.abbrev":"away_team", "homeTeam.abbrev":"home_team"})
    .select('id', "game_date", 'away_team', 'away_odds', 'away_imp_prob', 'away_win', 'home_team','home_odds', 'home_imp_prob', 'home_win', 'Bet_Team', 'Advantage')
)

Pretty_Bet_DF = (
    Bet_DF
    .select([
        pl.col("game_date").alias('Date'),
        pl.col("id").alias("Game ID"),
        pl.col("away_team").alias('Away Team'),
        pl.col('away_odds').round(0).alias('Away Odds'),
        pl.format("{}%", (pl.col('away_imp_prob')*100).round(2)).alias('Away ImpProb'),
        pl.format("{}%", (pl.col('away_win')*100).round(2)).alias('Away ExpWin'),
        pl.col("home_team").alias('Home Team'),
        pl.col('home_odds').round(0).alias('Home Odds'),
        pl.format("{}%", (pl.col('home_imp_prob')*100).round(2)).alias('Home ImpProb'),
        pl.format("{}%", (pl.col('home_win')*100).round(2)).alias('Home ExpWin'),
        pl.col("Bet_Team").alias("Bet Team"),
        pl.col("Advantage").alias('AdvNum'),
    ])
    .with_columns(pl.format("{}%", (pl.col('AdvNum')*100).round(2)).alias('Advantage'))
    .sort("AdvNum", descending=True)
    .drop('AdvNum')
)


with pl.Config(tbl_formatting="ASCII_FULL", tbl_hide_column_data_types=True, tbl_hide_dataframe_shape=True) as cfg:
    cfg.set_tbl_width_chars(200)
    print(Pretty_Bet_DF)

+------------+------------+-----------+-----------+--------------+-------------+-----------+-----------+--------------+-------------+----------+-----------+
| Date       | Game ID    | Away Team | Away Odds | Away ImpProb | Away ExpWin | Home Team | Home Odds | Home ImpProb | Home ExpWin | Bet Team | Advantage |
| 2023-11-27 | 2023020325 | VGK       | -115.0    | 53.49%       | 72.92%      | CGY       | -105.0    | 51.22%       | 36.08%      | VGK      | 19.43%    |
|------------+------------+-----------+-----------+--------------+-------------+-----------+-----------+--------------+-------------+----------+-----------|
| 2023-11-27 | 2023020326 | WSH       | -218.0    | 68.55%       | 81.68%      | SJS       | 180.0     | 35.71%       | 27.32%      | WSH      | 13.13%    |
|------------+------------+-----------+-----------+--------------+-------------+-----------+-----------+--------------+-------------+----------+-----------|
| 2023-11-27 | 2023020321 | BOS       | -258.0    | 72.07%

# Adjust For Goalies

In [None]:
PBP_xG

In [48]:
Goalies = (
    PBP_xG
    .with_columns([(pl.when(pl.col('event_team_abbr') == pl.col('home_abbreviation'))
                     .then(pl.col('away_goalie'))
                     .otherwise(pl.col('home_goalie'))).alias('goalie'),
                     (pl.when(pl.col('event_team_abbr') == pl.col('home_abbreviation'))
                     .then(pl.col('away_abbreviation'))
                     .otherwise(pl.col('home_abbreviation'))).alias('team')
                     ])
     .filter(((pl.col('season')==2024)) & (pl.col('season_type') == 'R') & (pl.col('model_type') != 'EN'))
     .groupby('team', 'goalie')
     .agg([
         (pl.col('event_id').unique().count()).alias('Shots_Faced'),
         (pl.col('xG').sum()).alias('xGoals_Against'),
         (pl.col('is_goal').sum()).alias('Goals_Against')
          ])
     .with_columns([
        (1 - (pl.col('Goals_Against') / pl.col('Shots_Faced'))).alias('Save_Pct'),
        (1 - (pl.col('xGoals_Against') / pl.col('Shots_Faced'))).alias('xSave_Percent')
                    ])
    .with_columns([
        (pl.col('xGoals_Against') - pl.col('Goals_Against')).alias('GSAX'),
        (pl.col('Save_Pct') - pl.col('xSave_Percent')).alias('SPAX')
    ])
     .sort('SPAX', descending = False)
)

Goalies.filter((~pl.col('goalie').is_null()) & (pl.col('team') == 'LAK')).head(10)

team,goalie,Shots_Faced,xGoals_Against,Goals_Against,Save_Pct,xSave_Percent,GSAX,SPAX
str,str,u32,f32,i32,f64,f64,f64,f64
"""LAK""","""Pheonix.Copley…",73,5.938986,11,0.849315,0.918644,-5.061014,-0.069329
"""LAK""","""Cam.Talbot""",351,21.440603,19,0.945869,0.938916,2.440603,0.006953


In [56]:
Today_Goalies = pl.read_csv('TodaysGoalies.csv')
# Rename
Today_Goalies.columns = ['Time', 'Team', 'Goalie', 'Status', 'Location']
Today_Goalies.head()

Time,Team,Goalie,Status,Location
str,str,str,str,str
"""7:00 PM""","""EDM""","""Stuart.Skinner…","""Expected""","""Away"""
"""7:00 PM""","""BOS""","""Linus.Ullmark""","""Expected""","""Away"""
"""8:00 PM""","""NYR""","""Igor.Shesterki…","""Expected""","""Away"""
"""8:00 PM""","""COL""","""Alexandar.Geor…","""Expected""","""Away"""
"""9:00 PM""","""LAK""","""Cam.Talbot""","""Expected""","""Away"""


In [57]:
Today_Goalies = (
    Today_Goalies
    .join(Goalies, left_on = ['Team','Goalie'], right_on = ['team', 'goalie'], how = 'left')
)

Today_Goalies

Time,Team,Goalie,Status,Location,Shots_Faced,xGoals_Against,Goals_Against,Save_Pct,xSave_Percent,GSAX,SPAX
str,str,str,str,str,u32,f32,i32,f64,f64,f64,f64
"""7:00 PM""","""EDM""","""Stuart.Skinner…","""Expected""","""Away""",245,20.451857,26,0.893878,0.916523,-5.548143,-0.022645
"""7:00 PM""","""BOS""","""Linus.Ullmark""","""Expected""","""Away""",278,17.904852,14,0.94964,0.935594,3.904852,0.014046
"""8:00 PM""","""NYR""","""Igor.Shesterki…","""Expected""","""Away""",315,20.972584,18,0.942857,0.93342,2.972584,0.009437
"""8:00 PM""","""COL""","""Alexandar.Geor…","""Expected""","""Away""",360,23.921452,26,0.927778,0.933552,-2.078548,-0.005774
"""9:00 PM""","""LAK""","""Cam.Talbot""","""Expected""","""Away""",351,21.440603,19,0.945869,0.938916,2.440603,0.006953
"""10:00 PM""","""CGY""","""Jacob.Markstro…","""Expected""","""Away""",339,28.264456,24,0.929204,0.916624,4.264456,0.01258
"""10:00 PM ""","""SJS""","""Mackenzie.Blac…","""Expected ""","""Away""",348,26.488951,31,0.91092,0.923882,-4.511049,-0.012963
"""7:00 PM""","""FLA""","""Sergei.Bobrovs…","""Expected""","""Home""",349,22.977184,24,0.931232,0.934163,-1.022816,-0.002931
"""7:00 PM""","""TBL""","""Jonas.Johansso…","""Expected""","""Home""",465,31.963608,32,0.931183,0.931261,-0.036392,-7.8e-05
"""8:00 PM""","""DAL""","""Jake.Oettinger…","""Expected""","""Home""",358,23.987486,17,0.952514,0.932996,6.987486,0.019518


### *Thoughts - How to Adjust For Goalies*

- Option 1: Prorated Save Percentage Above Expected Adjustment:
    - Predict the likely value of shots and xG from opposing team, then adjust the xG value based on Goalie's SPAX (Save % Above Expected)
        - EX: FLA v EDM (11/20/23 @ 7:00 PM ET)
            - EDM expected to give up 3.4 xG on 68 shots.
            - Stuart Skinner (EDM) SPAX = -2.2%.
                - ***Calc: 3.4 * (1 + abs((SPAX)))***
                - FLA xG == 3.47 (a 0.07 xG Increase When Skinner In Goal)
        - Potentially this could be a model where we try to predict total shots per game and layer xG + Goalie adjustments on top.
            - This is the option I like best at the moment.

            
- Option 2: Raw xG Adjustment:
    - Unsure of how we want this to work.
    - Since Save % Above/Below Expected is a telling stat for a goalie, rather than predicitve measure. 

# NHL API Play By Play

- 1) Collect Game ID's From This Season
- 2) Collect PBP data to apply model

In [177]:
from itertools import chain

# Variables
season_dates = pd.date_range(start='20151001', end='20231126', freq='D')

# Initalize List
game_ids = []

# Loop
for i in season_dates.strftime('%Y-%m-%d').tolist():
    sched_link = "https://api-web.nhle.com/v1/schedule/"+i
    response = requests.get(sched_link)

    # Parse the JSON content of the response
    raw_data = pd.json_normalize(response.json())
    sched_data = pd.json_normalize(raw_data['gameWeek'][0])
    sched_data = pd.json_normalize(sched_data['games'][0])

    if len(sched_data) == 0:
        pass
    else:
        game_ids.append(sched_data['id'].tolist())

game_ids = list(chain(*game_ids))

In [178]:
print(game_ids)

[2015010085, 2015010087, 2015010084, 2015010086, 2015010088, 2015010083, 2015010082, 2015010090, 2015010091, 2015010093, 2015010094, 2015010092, 2015010089, 2015010098, 2015010099, 2015010101, 2015010102, 2015010097, 2015010095, 2015010096, 2015010100, 2015010103, 2015010104, 2015020001, 2015020002, 2015020003, 2015020004, 2015020005, 2015020006, 2015020007, 2015020008, 2015020009, 2015020010, 2015020011, 2015020012, 2015020013, 2015020014, 2015020015, 2015020016, 2015020017, 2015020018, 2015020019, 2015020020, 2015020021, 2015020022, 2015020023, 2015020024, 2015020025, 2015020026, 2015020027, 2015020028, 2015020029, 2015020030, 2015020031, 2015020032, 2015020033, 2015020034, 2015020035, 2015020036, 2015020037, 2015020038, 2015020039, 2015020040, 2015020041, 2015020042, 2015020043, 2015020044, 2015020045, 2015020046, 2015020047, 2015020048, 2015020049, 2015020050, 2015020051, 2015020052, 2015020053, 2015020054, 2015020055, 2015020056, 2015020057, 2015020058, 2015020059, 2015020060, 201

In [179]:
df_list = []
for i in game_ids:
    pbp_link = 'https://api-web.nhle.com/v1/gamecenter/'+str(i)+'/play-by-play'

    pbp_response = requests.get(pbp_link)
    pbp_data = pd.json_normalize(pbp_response.json())

    ## GAME DATA
    game_data = pbp_data[['id', 'season', 'gameDate', 'gameType', 'awayTeam.id', 'awayTeam.abbrev', 'homeTeam.id', 'homeTeam.abbrev']]

    # PLAYS DATA
    plays_1 = pd.json_normalize(pbp_data['plays'])

    # Create an empty DataFrame to store the normalized plays
    normalized_plays = pd.DataFrame()

    # Iterate over each row in plays_1 and normalize the JSON data
    for _, row in plays_1.iterrows():
        # Normalize the JSON data in the current row
        normalized_row = pd.json_normalize(row)

        # Concatenate the normalized row to the result DataFrame
        normalized_plays = pd.concat([normalized_plays, normalized_row], ignore_index=True)

    result_df = pd.merge(game_data.assign(key=1), normalized_plays.assign(key=1), on='key').drop('key', axis=1)

    df_list.append(result_df)

PBP_RAW = pd.concat(df_list)

ConnectionError: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))

In [174]:
PBP_RAW.groupby(['typeDescKey', 'typeCode']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,id,season,gameDate,gameType,awayTeam.id,awayTeam.abbrev,homeTeam.id,homeTeam.abbrev,eventId,period,timeInPeriod,timeRemaining,situationCode,homeTeamDefendingSide,sortOrder,periodDescriptor.number,periodDescriptor.periodType,details.eventOwnerTeamId,details.losingPlayerId,details.winningPlayerId,details.xCoord,details.yCoord,details.zoneCode,details.shotType,details.shootingPlayerId,details.goalieInNetId,details.awaySOG,details.homeSOG,details.playerId,details.blockingPlayerId,details.hittingPlayerId,details.hitteePlayerId,details.reason,details.secondaryReason,details.typeCode,details.descKey,details.duration,details.committedByPlayerId,details.drawnByPlayerId,details.scoringPlayerId,details.scoringPlayerTotal,details.assist1PlayerId,details.assist1PlayerTotal,details.assist2PlayerId,details.assist2PlayerTotal,details.awayScore,details.homeScore,details.servedByPlayerId,periodDescriptor.otPeriods
typeDescKey,typeCode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
blocked-shot,508,55433,55433,55433,55433,55433,55433,55433,55433,55433,55433,55433,55433,55433,55433,55433,55433,55433,55430,0,0,55433,55433,55433,0,55433,0,0,0,0,55430,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40
delayed-penalty,535,3525,3525,3525,3525,3525,3525,3525,3525,3525,3525,3525,3525,3525,3525,3525,3525,3525,3525,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
faceoff,502,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,103796,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,83
failed-shot-attempt,537,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,0,0,31,31,31,0,31,31,31,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
game-end,524,1818,1818,1818,1818,1818,1818,1818,1818,1818,1818,1818,1818,102,1818,1818,1818,1818,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3
giveaway,504,30890,30890,30890,30890,30890,30890,30890,30890,30890,30890,30890,30890,30890,30890,30890,30890,30890,30890,0,0,30890,30890,30890,0,0,0,0,0,30890,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16
goal,505,11774,11774,11774,11774,11774,11774,11774,11774,11774,11774,11774,11774,11774,11689,11774,11774,11774,11774,0,0,11689,11689,11689,11665,0,11182,0,0,0,0,0,0,0,0,0,0,0,0,0,11774,11774,10762,10762,8667,8667,11774,11774,0,3
hit,503,79162,79162,79162,79162,79162,79162,79162,79162,79162,79162,79162,79162,79162,79162,79162,79162,79162,79162,0,0,79161,79161,79161,0,0,0,0,0,0,0,79162,79162,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34
missed-shot,507,45463,45463,45463,45463,45463,45463,45463,45463,45463,45463,45463,45463,45463,45463,45463,45463,45463,45463,0,0,45463,45463,45463,45462,45463,45051,0,0,0,0,0,0,45458,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50
penalty,509,14631,14631,14631,14631,14631,14631,14631,14631,14631,14631,14631,14631,14631,14465,14631,14631,14631,14631,0,0,14463,14463,14463,0,0,0,0,0,0,0,0,0,0,0,14631,14631,14631,14224,13200,0,0,0,0,0,0,0,0,802,3


In [175]:
PBP_RAW.head()

Unnamed: 0,id,season,gameDate,gameType,awayTeam.id,awayTeam.abbrev,homeTeam.id,homeTeam.abbrev,eventId,period,timeInPeriod,timeRemaining,situationCode,homeTeamDefendingSide,typeCode,typeDescKey,sortOrder,periodDescriptor.number,periodDescriptor.periodType,details.eventOwnerTeamId,details.losingPlayerId,details.winningPlayerId,details.xCoord,details.yCoord,details.zoneCode,details.shotType,details.shootingPlayerId,details.goalieInNetId,details.awaySOG,details.homeSOG,details.playerId,details.blockingPlayerId,details.hittingPlayerId,details.hitteePlayerId,details.reason,details.secondaryReason,details.typeCode,details.descKey,details.duration,details.committedByPlayerId,details.drawnByPlayerId,details.scoringPlayerId,details.scoringPlayerTotal,details.assist1PlayerId,details.assist1PlayerTotal,details.assist2PlayerId,details.assist2PlayerTotal,details.awayScore,details.homeScore,details.servedByPlayerId,periodDescriptor.otPeriods
0,2022020003,20222023,2022-10-11,2,14,TBL,3,NYR,51,1,00:00,20:00,1551,right,520,period-start,10,1,REG,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2022020003,20222023,2022-10-11,2,14,TBL,3,NYR,52,1,00:00,20:00,1551,right,502,faceoff,11,1,REG,14.0,8476459.0,8478010.0,0.0,0.0,N,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2022020003,20222023,2022-10-11,2,14,TBL,3,NYR,151,1,00:14,19:46,1551,right,506,shot-on-goal,12,1,REG,14.0,,,79.0,0.0,O,tip-in,8474564.0,8478048.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,
3,2022020003,20222023,2022-10-11,2,14,TBL,3,NYR,8,1,00:20,19:40,1551,right,525,takeaway,13,1,REG,14.0,,,3.0,33.0,N,,,,,,8476453.0,,,,,,,,,,,,,,,,,,,,
4,2022020003,20222023,2022-10-11,2,14,TBL,3,NYR,152,1,00:31,19:29,1551,right,508,blocked-shot,14,1,REG,3.0,,,74.0,-13.0,D,,8479984.0,,,,,8475184.0,,,,,,,,,,,,,,,,,,,


In [None]:
rename_dict = {
    "id": "game_id",
    "gameDate": "game_date",
    "awayTeam.id": "away_id",
    "awayTeam.abbrev": "away_abbreviation",
    "homeTeam.id": "home_id",
    "homeTeam.abbrev": "home_abbreviation",
    "gameType": "season_type",
    "eventId": "event_id",
    "sortOrder": "event_idx",
    "periodDescriptor.periodType": "period_type",
    "details.eventOwnerTeamId": "event_team_id",
    "details.xCoord": "details.yCoord",
    "details.zoneCode": "event_zone",
    "details.shotType": "secondary_type",
    "details.awayScore": "away_score",
    "details.homeScore": "home_score"
}

event_dict = {
    "faceoff": "FACEOFF",
    "shot-on-goal": "SHOT",
    "stoppage": "STOPPAGE",
    "hit": "HIT",
    "blocked-shot": "BLOCKED_SHOT",
    "missed-shot": "MISSED_SHOT",
    "giveaway": "GIVEAWAY",
    "takeaway": "TAKEAWAY",
    "penalty": "PENALTY",
    "goal": "GOAL",
    "period-start": "PERIOD_START",
    "period-end": "PERIOD_END",
    "delayed-penalty": "DELAYED_PENALTY",
    "game-end": "GAME_END",
    "shootout-complete": "SHOOTOUT_COMPLETE",
    "failed-shot-attempt": "FAILED_SHOT"
}

season_type_dict = {
    2: "R",
    3: "P",
    1: "PRE",
    4: "I"
}

# Rename Columns From Dictionary

# Filter
    # No Shootouts ('period_type' != 'SO')
    # No Pre-Season ('season_type' != 1)

# Convert season_type/event_type/etc. (Stored in Dictionaries above)

# Create Game and Period Seconds Remaining from timeInPeriod, timeRemaining: 'period', 'period_seconds', 'period_seconds_remaining', 'game_seconds', 'game_seconds_remaining'

# Look into situationCode, typeCode, typeDesckey (see if it can do anything for us on strength)

# Convert homeTeamDefendingSide and x + y to create x_abs and y_abs. (Use code from model createion for logic)

# Create event_player_1_XXX col from details.winningPlayerId, details.shootingPlayerId, details.playerId, details.hittingPlayerId

# Create event_player_2_XXX cols from details.goalieInNetId, details.losingPlayerId, details.blockingPlayerId, details.hitteePlayerId, 

In [185]:
PBP_RAW['periodDescriptor.periodType'].value_counts()

periodDescriptor.periodType
REG    557450
OT       7533
SO       1372
Name: count, dtype: int64

# Things To Do

- 1) Up-To-Date NHL API PBP Data
    - a) Replicate data coming in from SDV download.
    - b) Shift Data (https://api.nhle.com/stats/rest/en/shiftcharts?cayenneExp=gameId=2023XXXX)
- 2) Save models to be able to apply to new API data.
- 3) Build Goalie scraper (similar to pitchers)
- 4) Create WP Adjustements for:
    - a) Days rest
    - b) Goalies
    - c) Injuries/Lineups?