In [1]:
# Import the dependencies.
import pandas as pd
from copy import deepcopy
import requests

In [2]:
# Set seasons for which to pull data
season1 = 20192020
season2 = season1 - 10001
season3 = season2 - 10001

season1 = str(season1)
season2 = str(season2)
season3 = str(season3)

In [3]:
# Create endpoint URL for team info
team_url = 'https://statsapi.web.nhl.com/api/v1/teams'

# Run API request for info on all teams
team_info = requests.get(team_url).json()

# Parse JSON to retrieve IDs for all active teams
team_data = []

for team in team_info["teams"]:
    team_data.append({"ID": str(team["id"]),
                      "Team": team["abbreviation"]})
team_data

[{'ID': '1', 'Team': 'NJD'},
 {'ID': '2', 'Team': 'NYI'},
 {'ID': '3', 'Team': 'NYR'},
 {'ID': '4', 'Team': 'PHI'},
 {'ID': '5', 'Team': 'PIT'},
 {'ID': '6', 'Team': 'BOS'},
 {'ID': '7', 'Team': 'BUF'},
 {'ID': '8', 'Team': 'MTL'},
 {'ID': '9', 'Team': 'OTT'},
 {'ID': '10', 'Team': 'TOR'},
 {'ID': '12', 'Team': 'CAR'},
 {'ID': '13', 'Team': 'FLA'},
 {'ID': '14', 'Team': 'TBL'},
 {'ID': '15', 'Team': 'WSH'},
 {'ID': '16', 'Team': 'CHI'},
 {'ID': '17', 'Team': 'DET'},
 {'ID': '18', 'Team': 'NSH'},
 {'ID': '19', 'Team': 'STL'},
 {'ID': '20', 'Team': 'CGY'},
 {'ID': '21', 'Team': 'COL'},
 {'ID': '22', 'Team': 'EDM'},
 {'ID': '23', 'Team': 'VAN'},
 {'ID': '24', 'Team': 'ANA'},
 {'ID': '25', 'Team': 'DAL'},
 {'ID': '26', 'Team': 'LAK'},
 {'ID': '28', 'Team': 'SJS'},
 {'ID': '29', 'Team': 'CBJ'},
 {'ID': '30', 'Team': 'MIN'},
 {'ID': '52', 'Team': 'WPG'},
 {'ID': '53', 'Team': 'ARI'},
 {'ID': '54', 'Team': 'VGK'}]

In [4]:
# Creating list for player data
player_data = []

# Run API call for each team to get their roster information, then parse roster JSON to retrieve player data
for team in team_data:
    
    # Creating URL for team roster
    roster_url = team_url + "/" + team['ID'] + "/roster"
    
    # API call for team roster in JSON format
    roster_info = requests.get(roster_url).json()
    
    # Retrieving data for each player on roster
    for player in roster_info['roster']:
        player_data.append({'player_id': str(player['person']['id']),
                            'Player Name': player['person']['fullName'], 
                            'Team': team['Team'],
                            'Position': player['position']['abbreviation']})

In [5]:
# Separate goalies from skaters
goalie_data = []
skater_data = []

for player in player_data:
    if player['Position'] == 'G':
        goalie_data.append(player)
    else:
        skater_data.append(player)

In [6]:
# Looping through every goalie and adding stats from last season
for player in goalie_data:
    
    # Creating URL for API call for a specific player's stats in a specific season
    player_url = "https://statsapi.web.nhl.com/api/v1/people/" + player['player_id'] + "/stats?stats=statsSingleSeason&season=" + season1
    
    try:
        # API call for player stats in JSON format
        player_info = requests.get(player_url).json()
        player_stats = player_info['stats'][0]['splits'][0]['stat']

        # Adding selected stats to each player
        player["Games"] = player_stats['games']
        player["Games Started"] = player_stats['gamesStarted']
        player["W"] = player_stats['wins']
        player["L"] = player_stats['losses']
        player["OTL"] = player_stats['ot']
        player["GAA"] = player_stats['goalAgainstAverage']
        player["GA"] = player_stats['goalsAgainst']
        player["SA"] = player_stats['shotsAgainst']
        player["SV"] = player_stats['saves']
        player["SV%"] = player_stats['savePercentage']
        player["SO"] = player_stats['shutouts']
        player["MIN"] = player_stats['timeOnIce']
        
    except:
        # Add stats as 0 if player not found or if player did not play that season
        player["Games"] = 0
        player["Games Started"] = 0
        player["W"] = 0
        player["L"] = 0
        player["OTL"] = 0
        player["GAA"] = 0
        player["GA"] = 0
        player["SA"] = 0
        player["SV"] = 0
        player["SV%"] = 0
        player["SO"] = 0
        player["MIN"] = "0"

In [7]:
# Building function to get skater stats for a specific season
def get_skater_stats(data, season):
    
    # Looping through every skater and adding stats from season
    for player in data:

        # Creating URL for API call for a specific player's stats in a specific season
        player_url = "https://statsapi.web.nhl.com/api/v1/people/" + player['player_id'] + "/stats?stats=statsSingleSeason&season=" + season

        try:
            # API call for player stats in JSON format
            player_info = requests.get(player_url).json()
            player_stats = player_info['stats'][0]['splits'][0]['stat']

            # Adding selected stats to each player
            player["Games"] = player_stats['games']
            player["G"] = player_stats['goals']
            player["A"] = player_stats['assists']
            player["Points"] = player_stats['points']
            player["+/-"] = player_stats['plusMinus']
            player["PIM"] = int(player_stats['penaltyMinutes'])
            player["SOG"] = player_stats['shots']
            player["GWG"] = player_stats['gameWinningGoals']
            player["PPG"] = player_stats['powerPlayGoals']
            player["PPA"] = player_stats['powerPlayPoints'] - player_stats['powerPlayGoals']
            player["PPP"] = player_stats['powerPlayPoints']
            player["BS"] = player_stats['blocked']
            player["Hits"] = player_stats['hits']
            player["SHG"] = player_stats['shortHandedGoals']
            player["SHA"] = player_stats['shortHandedPoints'] - player_stats['shortHandedGoals']
            
        except:
            # Add stats as 0 if player not found or if player did not play that season
            player["Games"] = 0
            player["G"] = 0
            player["A"] = 0
            player["Points"] = 0
            player["+/-"] = 0
            player["PIM"] = 0
            player["SOG"] = 0
            player["GWG"] = 0
            player["PPG"] = 0
            player["PPA"] = 0
            player["PPP"] = 0
            player["BS"] = 0
            player["Hits"] = 0
            player["SHG"] = 0
            player["SHA"] = 0
    
    return data

In [8]:
# Get skater stats for most recent season
skater_data_season1 = get_skater_stats(deepcopy(skater_data), season1)

In [9]:
# Get skater stats for most second season
skater_data_season2 = get_skater_stats(deepcopy(skater_data), season2)

In [10]:
# Get skater stats for most third season
skater_data_season3 = get_skater_stats(deepcopy(skater_data), season3)

In [11]:
# Convert lists of dictionries into DFs
goalie_df = pd.DataFrame(goalie_data)
skater_season1_df = pd.DataFrame(skater_data_season1)
skater_season2_df = pd.DataFrame(skater_data_season2)
skater_season3_df = pd.DataFrame(skater_data_season3)

In [12]:
# Cleaning goalie data
goalie_df = goalie_df.sort_values(['W'], ascending = False)[goalie_df['Games'] != 0]
goalie_df

  


Unnamed: 0,player_id,Player Name,Team,Position,Games,Games Started,W,L,OTL,GAA,GA,SA,SV,SV%,SO,MIN
36,8476883,Andrei Vasilevskiy,TBL,G,52,52,35,14,3,2.5561,133,1605,1472,0.917,3,3121:54
83,8476945,Connor Hellebuyck,WPG,G,58,56,31,21,5,2.5699,140,1796,1656,0.922,6,3268:33
49,8476412,Jordan Binnington,STL,G,50,50,30,13,7,2.5647,126,1430,1304,0.912,3,2947:41
27,8475883,Frederik Andersen,TOR,G,52,52,29,13,7,2.8537,143,1577,1434,0.909,3,3006:40
21,8471679,Carey Price,MTL,G,58,58,27,25,6,2.7908,160,1755,1595,0.909,4,3439:49
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55,8479530,Adam Werner,COL,G,2,1,1,1,0,3.4156,5,58,53,0.914,0,87:50
3,8477480,Eric Comrie,NJD,G,3,2,0,2,0,4.2772,9,66,57,0.864,0,126:15
45,8475717,Calvin Pickard,DET,G,3,1,0,2,0,5.4584,15,74,59,0.797,0,164:53
46,8479288,Kasimir Kaskisuo,NSH,G,1,1,0,1,0,6.0000,6,38,32,0.842,0,60:00


In [13]:
# Cleaning skater data
skater_season1_df = skater_season1_df.sort_values(['Points'], ascending = False)[skater_season1_df['Games'] != 0]
skater_season2_df = skater_season2_df.sort_values(['Points'], ascending = False)[skater_season2_df['Games'] != 0]
skater_season3_df = skater_season3_df.sort_values(['Points'], ascending = False)[skater_season3_df['Games'] != 0]
skater_season3_df

  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


Unnamed: 0,player_id,Player Name,Team,Position,Games,G,A,Points,+/-,PIM,SOG,GWG,PPG,PPA,PPP,BS,Hits,SHG,SHA
496,8478402,Connor McDavid,EDM,C,82,41,67,108,20,26,274,7,5,15,20,46,28,1,3
72,8473512,Claude Giroux,PHI,C,82,34,68,102,28,20,193,1,9,27,36,23,30,0,0
99,8471215,Evgeni Malkin,PIT,C,78,42,56,98,16,87,239,7,14,24,38,32,48,0,0
466,8477492,Nathan MacKinnon,COL,C,74,39,58,97,11,55,284,12,12,20,32,22,38,0,1
151,8475791,Taylor Hall,BUF,LW,76,39,54,93,14,34,278,7,13,24,37,44,72,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382,8480009,Eeli Tolvanen,NSH,RW,3,0,0,0,0,0,3,0,0,0,0,1,2,0,0
545,8478873,Troy Terry,ANA,RW,2,0,0,0,0,0,3,0,0,0,0,2,0,0,0
565,8477810,Joel Hanley,DAL,D,5,0,0,0,0,0,2,0,0,0,0,4,1,0,0
519,8478874,Adam Gaudette,VAN,C,5,0,0,0,2,0,9,0,0,0,0,3,2,0,0


In [14]:
# Write cleaned DFs to csv files
goalie_df.to_csv("data/goalie_stats.csv", index=False)
skater_season1_df.to_csv("data/skater_stats_season1.csv", index=False)
skater_season2_df.to_csv("data/skater_stats_season2.csv", index=False)
skater_season3_df.to_csv("data/skater_stats_season3.csv", index=False)