In [5]:
# Pandas
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np

# Polars (Arrow)
from pyarrow.dataset import dataset
import polars as pl
pl.Config.set_tbl_rows(n=-1)
pl.Config.set_tbl_cols(n=-1)


# Hockey Specific
import sportsdataverse as sdv

# Hit API
import requests

# Load Expected Goals Data

In [30]:
model_path = "FinalExpectedGoalsPredictions.parquet"
PBP_xG = pl.read_parquet(model_path)

# Create Standings Table

In [31]:
pythag_exp = 1.86
Def = (
    PBP_xG
    .with_columns((pl.when(pl.col('event_team_abbr') == pl.col('home_abbreviation'))
                     .then(pl.col('away_abbreviation'))
                     .otherwise(pl.col('home_abbreviation'))).alias('team'))
     .filter(((pl.col('season')==2024)) & (pl.col('season_type') == 'R') & (pl.col('model_type') != 'EN'))
     .groupby('team')
     .agg([
         (pl.col('xG').sum()).alias('xGoals_Against'),
         ((pl.when(pl.col('event_type') == 'GOAL')
             .then(pl.lit(1)).otherwise(pl.lit(0))).sum())
             .alias('Goals_Against')
          ])
     .with_columns((pl.col('Goals_Against') - pl.col('xGoals_Against')).alias('D_Diff_Over_Under'))
     .sort('xGoals_Against', descending = True)
)

Off = (
    PBP_xG
    .with_columns(pl.col('event_team_abbr').alias('team'))
    .filter(((pl.col('season')==2024)) & (pl.col('season_type') == 'R') & (pl.col('model_type') != 'EN'))
    .groupby('team')
    .agg([(pl.col('xG').sum()).alias('xGoals_For'),
          ((pl.when(pl.col('event_type') == 'GOAL')
              .then(pl.lit(1))
              .otherwise(pl.lit(0))).sum()).alias('Goals_For')
          ])
     .with_columns((pl.col('Goals_For') - pl.col('xGoals_For')).alias('O_Diff_Over_Under'))
     .sort('xGoals_For', descending = True)
)

NHL_Stats = (
    Off
    .join(Def, on = 'team', how = "inner")
    .with_columns([
    (pl.col('xGoals_For') - pl.col('xGoals_Against')).alias('xG_Difference'),
    (pl.col('Goals_For') - pl.col('Goals_Against')).alias('G_Difference')
     ])
     .with_columns(
         (pl.col('G_Difference') - (pl.col('xG_Difference') )).alias('Diff_Over_Under'),
         ((pl.col('xGoals_For').pow(pythag_exp)) / ((pl.col('xGoals_For').pow(pythag_exp)) + (pl.col('xGoals_Against').pow(pythag_exp)))).alias('xGWin_Pct'),
         ((pl.col('Goals_For').pow(pythag_exp)) / ((pl.col('Goals_For').pow(pythag_exp)) + (pl.col('Goals_Against').pow(pythag_exp)))).alias('GWin_Pct'),
         ((((pl.col('Goals_For') + pl.col('xGoals_For'))/2).pow(pythag_exp)) / ((((pl.col('Goals_For') + pl.col('xGoals_For'))/2).pow(pythag_exp)) + (((pl.col('Goals_Against') + pl.col('xGoals_Against'))/2).pow(pythag_exp)))).alias('Weighted_Win_Pct')
         )
     .sort('Weighted_Win_Pct', descending=True)
)

NHL_Stats.head(5) #.filter(pl.col('team').is_in(bet_teams))

team,xGoals_For,Goals_For,O_Diff_Over_Under,xGoals_Against,Goals_Against,D_Diff_Over_Under,xG_Difference,G_Difference,Diff_Over_Under,xGWin_Pct,GWin_Pct,Weighted_Win_Pct
str,f32,i32,f64,f32,i32,f64,f32,i32,f64,f32,f64,f64
"""VAN""",35.697395,52,16.302605,38.260727,23,-15.260727,-2.563332,29,31.563332,0.467799,0.82014,0.660894
"""LAK""",36.181,44,7.819,27.379593,30,2.620407,8.801407,14,5.198593,0.626785,0.670924,0.650753
"""PIT""",40.25325,35,-5.25325,27.973797,29,1.026203,12.279453,6,-6.279453,0.663046,0.586564,0.626581
"""NSH""",39.331017,31,-8.331017,28.797981,28,-0.797981,10.533035,3,-7.533035,0.641016,0.547188,0.598088
"""VGK""",35.357235,44,8.642765,37.139286,27,-10.139286,-1.782051,17,18.782051,0.477151,0.71266,0.597726


# Get Today's Games

In [32]:
# Variables
date_str = "2023-11-17"

# Create Predictions
sched_link = "https://api-web.nhle.com/v1/schedule/"+date_str
response = requests.get(sched_link)

# Parse the JSON content of the response
raw_data = pd.json_normalize(response.json())
sched_data = pd.json_normalize(raw_data['gameWeek'][0])

# Get Odds
odds_data = pd.json_normalize(raw_data['oddsPartners'][0])
US_Prov_ID = int(odds_data[odds_data['country'] == 'US']['partnerId'].iloc[0])
sched_data = pd.json_normalize(sched_data['games'][0])
game_df = sched_data[['id', 'season', 'gameState', 'awayTeam.abbrev', 'homeTeam.abbrev', 'homeTeam.odds', 'awayTeam.odds', 'gameCenterLink']]

In [33]:
game_dfs = []
idx = 0

## Existing Game Odds
exist_id = [2023020251]
exist_home_odds = [ 165]
exist_away_odds = [-180]

exist_df = pd.DataFrame({
    'id': exist_id,
    'home_odds': exist_home_odds,
    'away_odds': exist_away_odds
})


for i in game_df['id']:

    raw_df = game_df[game_df['id'] == i]

    try:
        # Load Game Data
        home_odds_df = pd.json_normalize(raw_df['homeTeam.odds'][idx])
        away_odds_df = pd.json_normalize(raw_df['awayTeam.odds'][idx])

        home_odd = home_odds_df[home_odds_df['providerId'] == US_Prov_ID]['value'].iloc[0]
        away_odd = away_odds_df[away_odds_df['providerId'] == US_Prov_ID]['value'].iloc[0]

        df = raw_df[['id', 'season', 'awayTeam.abbrev', 'homeTeam.abbrev', 'gameState']]
        df = df.assign(home_odds=home_odd, away_odds=away_odd, game_date=date_str)

        game_dfs.append(df)

    except Exception as e:

        # Load Game Data
        df = raw_df[raw_df['id'] == i]

        home_odd = exist_df[exist_df['id'] == i]['home_odds'].iloc[0]
        away_odd = exist_df[exist_df['id'] == i]['away_odds'].iloc[0]

        df = raw_df[['id', 'season', 'awayTeam.abbrev', 'homeTeam.abbrev','gameState']]
        df = df.assign(home_odds=home_odd, away_odds=away_odd, game_date=date_str).astype({'home_odds': 'float64', 'away_odds': 'float64'})

        gme_lab = str(df['awayTeam.abbrev'].iloc[0])+ ' ('+ str(round(away_odd, 0)) + ') @ '+str(df['homeTeam.abbrev'].iloc[0]+ ' ('+ str(round(home_odd, 0)) + ')')

        print(f"{gme_lab} Has Already Started")

        game_dfs.append(df)

    # Move To Next Game
    idx += 1

# Concatenate all DataFrames in the game_dfs list into a single polarsDataFrame
result_df = pd.concat(game_dfs, ignore_index=True).astype({'home_odds': 'float64', 'away_odds': 'float64'})
result_df = pl.DataFrame(result_df)

TOR (-180) @ DET (165) Has Already Started


In [34]:
Bet_DF = (
    result_df
    .join(NHL_Stats.select([(pl.col(col).alias(f'away_{col}')) for col in NHL_Stats.columns]), left_on=['awayTeam.abbrev'], right_on=['away_team'])
    .join(NHL_Stats.select([(pl.col(col).alias(f'home_{col}')) for col in NHL_Stats.columns]), left_on=['homeTeam.abbrev'], right_on=['home_team'])
    .with_columns([
        ((pl.col('home_Weighted_Win_Pct')) * (1 - pl.col('away_Weighted_Win_Pct'))).alias('home_win'),
        ((pl.col('away_Weighted_Win_Pct')) * (1 - pl.col('home_Weighted_Win_Pct'))).alias('away_win'),
        (pl.when(pl.col('home_odds') < 0).then((-1*(pl.col('home_odds'))) / ((-1*(pl.col('home_odds')) + 100))).otherwise(100 / (pl.col('home_odds') + 100))).alias('home_imp_prob'),
        (pl.when(pl.col('away_odds') < 0).then((-1*(pl.col('away_odds'))) / ((-1*(pl.col('away_odds')) + 100))).otherwise(100 / (pl.col('away_odds') + 100))).alias('away_imp_prob')
    ])
    .with_columns([
        ((pl.col('home_win')) / (pl.col('home_win') + pl.col('away_win'))).alias('home_win'),
        ((pl.col('away_win')) / (pl.col('home_win') + pl.col('away_win'))).alias('away_win')
        
    ])
    .with_columns([
        (pl.col('home_win') - pl.col('home_imp_prob')).alias('home_xAdvantage'),
        (pl.col('away_win') - pl.col('away_imp_prob')).alias('away_xAdvantage')
    ])
    .with_columns([
        (pl.when(pl.col('home_xAdvantage') > pl.col('away_xAdvantage')).then(pl.col('home_xAdvantage')).otherwise(pl.col('away_xAdvantage'))).alias('Advantage'),
        (pl.when(pl.col('home_xAdvantage') > pl.col('away_xAdvantage')).then(pl.col('homeTeam.abbrev')).otherwise(pl.col('awayTeam.abbrev'))).alias('Bet_Team')
    ])
    .rename({"awayTeam.abbrev":"away_team", "homeTeam.abbrev":"home_team"})
    .select('id', "game_date", 'away_team', 'away_odds', 'away_imp_prob', 'away_win', 'home_team','home_odds', 'home_imp_prob', 'home_win', 'Bet_Team', 'Advantage')
)

Pretty_Bet_DF = (
    Bet_DF
    .select([
        pl.col("game_date").alias('Date'),
        pl.col("id").alias("Game ID"),
        pl.col("away_team").alias('Away Team'),
        pl.col('away_odds').round(0).alias('Away Odds'),
        pl.format("{}%", (pl.col('away_imp_prob')*100).round(2)).alias('Away ImpProb'),
        pl.format("{}%", (pl.col('away_win')*100).round(2)).alias('Away ExpWin'),
        pl.col("home_team").alias('Home Team'),
        pl.col('home_odds').round(0).alias('Home Odds'),
        pl.format("{}%", (pl.col('home_imp_prob')*100).round(2)).alias('Home ImpProb'),
        pl.format("{}%", (pl.col('home_win')*100).round(2)).alias('Home ExpWin'),
        pl.col("Bet_Team").alias("Bet Team"),
        pl.col("Advantage").alias('AdvNum'),
    ])
    .with_columns(pl.format("{}%", (pl.col('AdvNum')*100).round(2)).alias('Advantage'))
    .sort("AdvNum", descending=True)
    .drop('AdvNum')
)


with pl.Config(tbl_formatting="ASCII_FULL", tbl_hide_column_data_types=True, tbl_hide_dataframe_shape=True) as cfg:
    cfg.set_tbl_width_chars(200)
    print(Pretty_Bet_DF)

+------------+------------+-----------+-----------+--------------+-------------+-----------+-----------+--------------+-------------+----------+-----------+
| Date       | Game ID    | Away Team | Away Odds | Away ImpProb | Away ExpWin | Home Team | Home Odds | Home ImpProb | Home ExpWin | Bet Team | Advantage |
| 2023-11-17 | 2023020251 | TOR       | -180.0    | 64.29%       | 47.79%      | DET       | 165.0     | 37.74%       | 52.21%      | DET      | 14.47%    |
|------------+------------+-----------+-----------+--------------+-------------+-----------+-----------+--------------+-------------+----------+-----------|
| 2023-11-17 | 2023020253 | FLA       | -175.0    | 63.64%       | 53.95%      | ANA       | 145.0     | 40.82%       | 46.05%      | ANA      | 5.23%     |
|------------+------------+-----------+-----------+--------------+-------------+-----------+-----------+--------------+-------------+----------+-----------|
| 2023-11-17 | 2023020252 | BUF       | 154.0     | 39.37%