In [33]:
import pandas as pd

In [70]:
players_2024 = pd.read_csv("../data/pre-cleaned/players_2024.csv") 
players_2024.head()

Unnamed: 0,playerId,season,name,team,position,situation,games_played,icetime,shifts,gameScore,...,OffIce_F_xGoals,OffIce_A_xGoals,OffIce_F_shotAttempts,OffIce_A_shotAttempts,xGoalsForAfterShifts,xGoalsAgainstAfterShifts,corsiForAfterShifts,corsiAgainstAfterShifts,fenwickForAfterShifts,fenwickAgainstAfterShifts
0,8478047,2024,Michael Bunting,NSH,L,other,76,2237.0,37.0,26.19,...,7.28,10.09,72.0,87.0,0.0,0.0,0.0,0.0,0.0,0.0
1,8478047,2024,Michael Bunting,NSH,L,all,76,70819.0,1474.0,43.7,...,161.54,187.75,3221.0,3522.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8478047,2024,Michael Bunting,NSH,L,5on5,76,59813.0,1294.0,43.7,...,112.73,122.08,2661.0,2707.0,0.71,1.71,19.0,43.0,16.0,31.0
3,8478047,2024,Michael Bunting,NSH,L,4on5,76,6.0,2.0,2.58,...,0.2,0.17,4.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0
4,8478047,2024,Michael Bunting,NSH,L,5on4,76,8763.0,141.0,36.88,...,23.81,2.6,311.0,54.0,0.0,0.01,0.0,1.0,0.0,1.0


Cleaning 2024 Data

In [60]:
if "situation" in players_2024.columns:
    players_2024_filtered = players_2024[players_2024["situation"] == "all"].copy()

columns = ["playerId", "season", "name", "position", "team", "games_played", "I_F_xGoals", "I_F_primaryAssists",
           "I_F_secondaryAssists", "I_F_shotsOnGoal", "I_F_points", "I_F_goals", "icetime",
           "OnIce_F_highDangerShots", "OnIce_F_goals"]

players_2024_filtered = players_2024_filtered[columns]

players_2024_filtered["icetime_per_game"] = (players_2024_filtered["icetime"] / 60) / players_2024_filtered["games_played"]
players_2024_filtered["shot_percentage"] = (players_2024_filtered["I_F_goals"] / players_2024_filtered["I_F_shotsOnGoal"])
players_2024_filtered["points_per_60"] = (players_2024_filtered["I_F_points"] / 
                                            (players_2024_filtered["icetime_per_game"] * players_2024_filtered["games_played"])) * 60
players_2024_filtered["ixG-goals"] = (players_2024_filtered["I_F_xGoals"] - players_2024_filtered["I_F_goals"])         
players_2024_filtered["ppg"] = (players_2024_filtered["I_F_points"] / players_2024_filtered["games_played"])
players_2024_filtered["apg"] = ((players_2024_filtered["I_F_primaryAssists"] + players_2024_filtered["I_F_secondaryAssists"])/ players_2024_filtered["games_played"])
players_2024_filtered["gpg"] = (players_2024_filtered["I_F_goals"] / players_2024_filtered["games_played"])


players_2024_filtered = players_2024_filtered.drop(["icetime"], axis = 1)

players_2024_filtered.head()

Unnamed: 0,playerId,season,name,position,team,games_played,I_F_xGoals,I_F_primaryAssists,I_F_secondaryAssists,I_F_shotsOnGoal,...,I_F_goals,OnIce_F_highDangerShots,OnIce_F_goals,icetime_per_game,shot_percentage,points_per_60,ixG-goals,ppg,apg,gpg
1,8478047,2024,Michael Bunting,L,NSH,76,25.19,12.0,7.0,155.0,...,19.0,82.0,67.0,15.530482,0.122581,1.931685,6.19,0.5,0.25,0.25
6,8480950,2024,Ilya Lyubushkin,D,DAL,80,2.33,4.0,9.0,51.0,...,1.0,70.0,53.0,17.386875,0.019608,0.603904,1.33,0.175,0.1625,0.0125
11,8477369,2024,Carson Soucy,D,NYR,75,3.13,3.0,6.0,81.0,...,4.0,51.0,42.0,17.915111,0.049383,0.580516,-0.87,0.173333,0.12,0.053333
16,8481518,2024,Nolan Foote,L,NJD,7,0.51,0.0,1.0,5.0,...,0.0,4.0,3.0,9.82381,0.0,0.872516,0.51,0.142857,0.142857,0.0
21,8477964,2024,Ivan Barbashev,C,VGK,70,18.33,20.0,8.0,111.0,...,23.0,72.0,74.0,16.798333,0.207207,2.602299,-4.67,0.728571,0.4,0.328571


Getting Birthdate

In [48]:
import requests

def get_player_info(player_id):
    url = f"https://api-web.nhle.com/v1/player/{player_id}/landing"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Referer": "https://www.nhl.com/"
    }

    try:
        res = requests.get(url, headers=headers)
        res.raise_for_status()

        player = res.json()

        return {
            "playerId": player_id,
            "birthdate": player.get("birthDate")
        }

    except Exception as e:
        print(played_id + " " + res.status_code)
        return None

In [49]:
import time
from tqdm import tqdm

In [50]:
player_ids = players_2024_filtered["playerId"].unique()

players = []
for p in tqdm(player_ids):
    data = get_player_info(p)
    if data:
        players.append(data)
    time.sleep(0.15)

100%|██████████| 920/920 [08:34<00:00,  1.79it/s]


In [52]:
player_birthdates = pd.DataFrame(players)
player_birthdates.head()

Unnamed: 0,playerId,birthdate
0,8478047,1995-09-17
1,8480950,1994-04-06
2,8477369,1994-07-27
3,8481518,2000-11-29
4,8477964,1995-12-14


In [53]:
player_birthdates.to_csv("../data/player_birthdates_test.csv", index=False)

In [61]:
players_2024_filtered2 = players_2024_filtered.merge(player_birthdates, on="playerId", how="left")
players_2024_filtered2.head()

Unnamed: 0,playerId,season,name,position,team,games_played,I_F_xGoals,I_F_primaryAssists,I_F_secondaryAssists,I_F_shotsOnGoal,...,OnIce_F_highDangerShots,OnIce_F_goals,icetime_per_game,shot_percentage,points_per_60,ixG-goals,ppg,apg,gpg,birthdate
0,8478047,2024,Michael Bunting,L,NSH,76,25.19,12.0,7.0,155.0,...,82.0,67.0,15.530482,0.122581,1.931685,6.19,0.5,0.25,0.25,1995-09-17
1,8480950,2024,Ilya Lyubushkin,D,DAL,80,2.33,4.0,9.0,51.0,...,70.0,53.0,17.386875,0.019608,0.603904,1.33,0.175,0.1625,0.0125,1994-04-06
2,8477369,2024,Carson Soucy,D,NYR,75,3.13,3.0,6.0,81.0,...,51.0,42.0,17.915111,0.049383,0.580516,-0.87,0.173333,0.12,0.053333,1994-07-27
3,8481518,2024,Nolan Foote,L,NJD,7,0.51,0.0,1.0,5.0,...,4.0,3.0,9.82381,0.0,0.872516,0.51,0.142857,0.142857,0.0,2000-11-29
4,8477964,2024,Ivan Barbashev,C,VGK,70,18.33,20.0,8.0,111.0,...,72.0,74.0,16.798333,0.207207,2.602299,-4.67,0.728571,0.4,0.328571,1995-12-14


In [62]:
from datetime import datetime

In [63]:
players_2024_filtered2["birthYear"] = pd.to_datetime(players_2024_filtered2["birthdate"]).dt.year
players_2024_filtered2.head()

Unnamed: 0,playerId,season,name,position,team,games_played,I_F_xGoals,I_F_primaryAssists,I_F_secondaryAssists,I_F_shotsOnGoal,...,OnIce_F_goals,icetime_per_game,shot_percentage,points_per_60,ixG-goals,ppg,apg,gpg,birthdate,birthYear
0,8478047,2024,Michael Bunting,L,NSH,76,25.19,12.0,7.0,155.0,...,67.0,15.530482,0.122581,1.931685,6.19,0.5,0.25,0.25,1995-09-17,1995
1,8480950,2024,Ilya Lyubushkin,D,DAL,80,2.33,4.0,9.0,51.0,...,53.0,17.386875,0.019608,0.603904,1.33,0.175,0.1625,0.0125,1994-04-06,1994
2,8477369,2024,Carson Soucy,D,NYR,75,3.13,3.0,6.0,81.0,...,42.0,17.915111,0.049383,0.580516,-0.87,0.173333,0.12,0.053333,1994-07-27,1994
3,8481518,2024,Nolan Foote,L,NJD,7,0.51,0.0,1.0,5.0,...,3.0,9.82381,0.0,0.872516,0.51,0.142857,0.142857,0.0,2000-11-29,2000
4,8477964,2024,Ivan Barbashev,C,VGK,70,18.33,20.0,8.0,111.0,...,74.0,16.798333,0.207207,2.602299,-4.67,0.728571,0.4,0.328571,1995-12-14,1995


In [64]:
players_2024_filtered2["age"] = players_2024_filtered2["season"] - players_2024_filtered2["birthYear"]

In [65]:
players_2024_filtered2 = players_2024_filtered2.drop(["birthdate"], axis = 1)

In [68]:
players_2024_filtered2.head()

Unnamed: 0,playerId,season,name,position,team,games_played,I_F_xGoals,I_F_primaryAssists,I_F_secondaryAssists,I_F_shotsOnGoal,...,OnIce_F_goals,icetime_per_game,shot_percentage,points_per_60,ixG-goals,ppg,apg,gpg,birthYear,age
0,8478047,2024,Michael Bunting,L,NSH,76,25.19,12.0,7.0,155.0,...,67.0,15.530482,0.122581,1.931685,6.19,0.5,0.25,0.25,1995,29
1,8480950,2024,Ilya Lyubushkin,D,DAL,80,2.33,4.0,9.0,51.0,...,53.0,17.386875,0.019608,0.603904,1.33,0.175,0.1625,0.0125,1994,30
2,8477369,2024,Carson Soucy,D,NYR,75,3.13,3.0,6.0,81.0,...,42.0,17.915111,0.049383,0.580516,-0.87,0.173333,0.12,0.053333,1994,30
3,8481518,2024,Nolan Foote,L,NJD,7,0.51,0.0,1.0,5.0,...,3.0,9.82381,0.0,0.872516,0.51,0.142857,0.142857,0.0,2000,24
4,8477964,2024,Ivan Barbashev,C,VGK,70,18.33,20.0,8.0,111.0,...,74.0,16.798333,0.207207,2.602299,-4.67,0.728571,0.4,0.328571,1995,29
