<a href="https://colab.research.google.com/github/tylerviducic/NHL_Stats/blob/master/NHL_Draft_scrape.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import numpy as np
import time

In [2]:
def get_draft_by_year(year):
  draft_dict = requests.get('https://statsapi.web.nhl.com/api/v1/draft/{}'.format(year)).json()
  return draft_dict['drafts'][0]['rounds']

In [5]:
def get_player_stats(prospect_link, draft_year):
  prospect_dict = requests.get('https://statsapi.web.nhl.com/{}'.format(prospect_link)).json()
  
  try:
    nhl_player_id = prospect_dict['prospects'][0]['nhlPlayerId']

    player_stats = {}
    for i in range(5):
      player_stats[str(year + i) + str(year+ i + 1)] = ()

    nhl_dict = requests.get('https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=yearByYear'.format(nhl_player_id)).json()
    season_stats = nhl_dict['stats'][0]['splits']
    for season in season_stats:
      if season['season'] in player_stats:
        if season['league']['name'] != 'National Hockey League' and (len(player_stats[season['season']]) == 0 or player_stats[season['season']][0] < 1):
          player_stats[season['season']] = (0, 0, 0, 0)
        elif season['league']['name'] == 'National Hockey League':
          games_played = season['stat']['games']
          toi_string = season['stat']['timeOnIce'].split(":")
          toi = float(toi_string[0]) + float(toi_string[1]) / 60
          if 'assists' in season['stat'].keys():
            assists = season['stat']['assists']
          else:
            assists = 0
          if 'goals' in season['stat'].keys():
            goals = season['stat']['goals']
          else: 
            goals = 0
          player_stats[season['season']] = (float(games_played), float(toi), float(assists), float(goals))
    for season in player_stats:
      if player_stats[season] == ():
        player_stats[season] = (0, 0, 0, 0)
    return player_stats
  except KeyError:
    return None
    
    

In [6]:
years = range(2008, 2017)

In [16]:
for year in years:
  draft = get_draft_by_year(year)

  list_of_player_arrays = []

  for round in draft:
    for pick in round['picks']:
      pick_num = pick['pickOverall']
      prospect = pick['prospect']
      player_name = prospect['fullName']
      player_link = prospect['link']

      player_stats = get_player_stats(player_link, year)
      print('Player - {} --- stats - {}'.format(player_name, player_stats))
      data_list = [pick_num]
      try:
        for season in player_stats:
          data_list.append(player_stats[season][0])
          data_list.append(player_stats[season][1])
          data_list.append(player_stats[season][2])
          data_list.append(player_stats[season][3])
      except TypeError:
        pass
      data_array = np.array(data_list)
      list_of_player_arrays.append(data_array)
      time.sleep(1)

  player_stat_array = np.array(list_of_player_arrays)
  with open('/content/drive/My Drive/draft_data/nhl_draft_{}.csv'.format(year), 'wb') as f:
    np.savetxt(f, player_stat_array)  

    

Player - Steven Stamkos --- stats - {'20082009': (79.0, 1179.85, 23.0, 23.0), '20092010': (82.0, 1685.2333333333333, 44.0, 51.0), '20102011': (82.0, 1655.9333333333334, 46.0, 45.0), '20112012': (82.0, 1805.6666666666667, 37.0, 60.0), '20122013': (48.0, 1057.1, 28.0, 29.0)}
Player - Drew Doughty --- stats - {'20082009': (81.0, 1930.1166666666666, 21.0, 6.0), '20092010': (82.0, 2047.95, 43.0, 16.0), '20102011': (76.0, 1948.8666666666666, 29.0, 11.0), '20112012': (77.0, 1917.25, 26.0, 10.0), '20122013': (48.0, 1266.9333333333334, 16.0, 6.0)}
Player - Zach Bogosian --- stats - {'20082009': (47.0, 851.0833333333334, 10.0, 9.0), '20092010': (81.0, 1734.2, 13.0, 10.0), '20102011': (71.0, 1590.6666666666667, 12.0, 5.0), '20112012': (65.0, 1515.55, 25.0, 5.0), '20122013': (33.0, 763.0833333333334, 9.0, 5.0)}
Player - Alex Pietrangelo --- stats - {'20082009': (8.0, 132.11666666666667, 1.0, 0.0), '20092010': (9.0, 149.16666666666666, 1.0, 1.0), '20102011': (79.0, 1738.0833333333333, 32.0, 11.0), 

TypeError: ignored

In [15]:
print(player_stat_array)

[[1.00000000e+00 8.20000000e+01 1.44575000e+03 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [2.00000000e+00 7.30000000e+01 1.30761667e+03 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [3.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [2.09000000e+02 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [2.10000000e+02 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [2.11000000e+02 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]]
