In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy               as np
import pandas              as pd
import matplotlib.pyplot   as plt
import seaborn             as sns

import re
from bs4 import BeautifulSoup
import requests

%matplotlib inline

In [2]:
player_urls = ["https://www.eliteprospects.com/player/90230/a.j.-white",
               "https://www.eliteprospects.com/player/232966/matt-jurusik",
               "https://www.eliteprospects.com/player/91186/mikko-rantanen",
               "https://www.eliteprospects.com/player/199655/cale-makar"]

In [3]:
data = []

for player in player_urls:
    data.append(requests.get(player).text)

In [4]:
soup = []
for player in data:
    soup.append(BeautifulSoup(player,"html.parser"))

## Player Info

In [5]:
player_data_header = ['Name']

for element in soup[0].find_all("div", class_="col-xs-12 col-6 p-0"):
    player_data_header.append(element.text.strip())

for element in soup[0].find_all("div", class_="col-xs-12 col-7 p-0"):
    player_data_header.append(element.text.strip())

In [6]:
player_data = []

for plyr in soup:
    for element in plyr.find_all("div", class_="ep-entity-header__name"):
        player_data.append(element.text.strip())
        
    for element in plyr.find_all("div", class_="col-xs-12 col-18 text-right p-0 ep-text-color--black"):
        player_data.append(element.text.strip())

    for element in plyr.find_all("div", class_="col-xs-12 col-17 text-right p-0 ep-text-color--black"):
        player_data.append(element.text.strip())

In [7]:
player_data_header

['Name',
 'Position',
 'Age',
 'Height',
 'Weight',
 'Nation',
 'Shoots',
 'Youth Team',
 'Contract',
 'Date of Birth',
 'Place of Birth']

In [8]:
player_data_header[6] = 'Shoots/Catches'

In [9]:
player_data

['A.J. White',
 'LW',
 '28',
 '6\'2" / 188 cm',
 '201 lbs / 91 kg',
 'USA',
 'L',
 '-',
 '20/21',
 'Apr 19, 1992',
 'Dearborn, MI, USA',
 'Matt Jurusik',
 'G',
 '23',
 '6\'2" / 188 cm',
 '194 lbs / 88 kg',
 'USA',
 'L',
 '-',
 'Try-out',
 'May 01, 1997',
 'La Grange, IL, USA',
 'Mikko Rantanen',
 'RW',
 '24',
 '6\'4" / 193 cm',
 '216 lbs / 98 kg',
 'Finland',
 'L',
 'TPS',
 '24/25',
 'Oct 29, 1996',
 'Nousiainen, FIN',
 'Cale Makar',
 'D',
 '22',
 '5\'11" / 180 cm',
 '187 lbs / 85 kg',
 'Canada',
 'R',
 'Crowchild MHA',
 '20/21',
 'Oct 30, 1998',
 'Calgary, AB, CAN']

In [10]:
p_data = np.reshape(player_data,(len(player_urls),11))

In [11]:
df = pd.DataFrame(data= p_data, columns=player_data_header)

In [12]:
df.head()

Unnamed: 0,Name,Position,Age,Height,Weight,Nation,Shoots/Catches,Youth Team,Contract,Date of Birth,Place of Birth
0,A.J. White,LW,28,"6'2"" / 188 cm",201 lbs / 91 kg,USA,L,-,20/21,"Apr 19, 1992","Dearborn, MI, USA"
1,Matt Jurusik,G,23,"6'2"" / 188 cm",194 lbs / 88 kg,USA,L,-,Try-out,"May 01, 1997","La Grange, IL, USA"
2,Mikko Rantanen,RW,24,"6'4"" / 193 cm",216 lbs / 98 kg,Finland,L,TPS,24/25,"Oct 29, 1996","Nousiainen, FIN"
3,Cale Makar,D,22,"5'11"" / 180 cm",187 lbs / 85 kg,Canada,R,Crowchild MHA,20/21,"Oct 30, 1998","Calgary, AB, CAN"


## Player Statistics

In [13]:
league_stats = soup[0].find_all("div", id="league-stats")[0]

In [14]:
non_g_cols = ['Season','Team','League','Regular Games Played','Regular Goals',
           'Regular Assists', 'Regular Total Points', 'Regular PIM', 'Regular +/-',
           'Postseason Status','Postseason Games Played','Postseason Goals',
           'Postseason Assists', 'Postseason Total Points', 'Postseason PIM', 'Postseason +/-']

In [15]:
g_cols = ['Season', 'Team', 'League', 'Regular Games Played', 'Regular Goal Difference',
            'Regular Goal Against Average', 'Regular Save %', 'Regular Goals Against', 'Regular Saves (Total)',
            'Regular Shutout', 'Regular Season Record (W-L-T)', 'Regular Time On Ice',
            'Postseason', 'Postseason Games Played', 'Postseason Goal Difference', 'Postseason Goal Against Average',
            'Postseason Save %', 'Postseason Goals Against', 'Postseason Saves (Total)', 'Postseason Shutout',
            'Postseason Season Record (W-L-T)', 'Postseason Time On Ice']

In [16]:
df_stats = []

for player in soup:
    league_stats = player.find_all("div", id="league-stats")[0]
    
    if player.find('div',class_='col-xs-12 col-18 text-right p-0 ep-text-color--black').text.strip() == 'G':
        stats_dict = {'season sorted':[], 'team': [], 'league': [], 'regular gp': [],
              'regular gd': [], 'regular gaa': [], 'regular svp': [], 'regular ga': [],
              'regular svs': [], 'regular so': [], 'regular wlt': [], 'regular toi': [],
              'postseason': [], 'postseason gp': [], 'postseason gd': [], 'postseason gaa': [],
              'postseason svp': [], 'postseason ga': [], 'postseason svs': [],
              'postseason so': [], 'postseason wlt': [], 'postseason toi': []}
        for key in stats_dict:
            if key == 'postseason':
                raw_list = league_stats.tbody.find_all(lambda tag: tag.name == 'td' and tag.get('class') == [key])
                stats_dict[key] = list(map(lambda x: x.text.strip(), raw_list))
            else:
                raw_list = league_stats.tbody.find_all('td', class_=key)
                stats_dict[key] = list(map(lambda x: x.text.strip(), raw_list))
    
        temp_df = pd.DataFrame(stats_dict)
        temp_df.columns = g_cols
    
    else:
        stats_dict = {'season sorted':[], 'team': [], 'league': [], 'regular gp': [],
                      'regular g': [], 'regular a': [], 'regular tp': [], 'regular pim': [],
                      'regular pm': [], 'postseason': [], 'postseason gp': [], 'postseason g': [],
                      'postseason a': [], 'postseason tp': [], 'postseason pim': [], 'postseason pm': []}

        for key in stats_dict:
            if key == 'postseason':
                raw_list = league_stats.tbody.find_all(lambda tag: tag.name == 'td' and tag.get('class') == [key])
                stats_dict[key] = list(map(lambda x: x.text.strip(), raw_list))  
            else:
                raw_list = league_stats.tbody.find_all('td', class_=key)
                stats_dict[key] = list(map(lambda x: x.text.strip(), raw_list))
        
        temp_df = pd.DataFrame(stats_dict)
        temp_df.columns = non_g_cols
    
    df_stats.append(temp_df)

In [17]:
df['Stats'] = df_stats

In [18]:
df.head()

Unnamed: 0,Name,Position,Age,Height,Weight,Nation,Shoots/Catches,Youth Team,Contract,Date of Birth,Place of Birth,Stats
0,A.J. White,LW,28,"6'2"" / 188 cm",201 lbs / 91 kg,USA,L,-,20/21,"Apr 19, 1992","Dearborn, MI, USA",Season Team League...
1,Matt Jurusik,G,23,"6'2"" / 188 cm",194 lbs / 88 kg,USA,L,-,Try-out,"May 01, 1997","La Grange, IL, USA",Season Team Leag...
2,Mikko Rantanen,RW,24,"6'4"" / 193 cm",216 lbs / 98 kg,Finland,L,TPS,24/25,"Oct 29, 1996","Nousiainen, FIN",Season Team ...
3,Cale Makar,D,22,"5'11"" / 180 cm",187 lbs / 85 kg,Canada,R,Crowchild MHA,20/21,"Oct 30, 1998","Calgary, AB, CAN",Season Team League ...


In [19]:
df_stats[df.index[df['Name']=='Cale Makar'][0]]

Unnamed: 0,Season,Team,League,Regular Games Played,Regular Goals,Regular Assists,Regular Total Points,Regular PIM,Regular +/-,Postseason Status,Postseason Games Played,Postseason Goals,Postseason Assists,Postseason Total Points,Postseason PIM,Postseason +/-
0,2011-12,NWCAA Bruins U15 AA,HCBAA,28,4,16,20,18,,Playoffs,5.0,0.0,3.0,3.0,0.0,
1,2012-13,Calgary Flames U15 AAA,AMBHL,33,3,19,22,22,,,,,,,,
2,2013-14,NWCAA Stampeders U16 AAA,AMMHL,36,9,19,28,35,,,,,,,,
3,,Calgary Flames U18 AAA,AMHL,6,0,1,1,4,,,,,,,,
4,2014-15,Calgary Flames U18 AAA,AMHL,34,7,16,23,14,,Playoffs,2.0,0.0,0.0,0.0,0.0,
5,,Brooks Bandits,AJHL,3,1,4,5,4,,Playoffs,20.0,1.0,6.0,7.0,4.0,
6,2015-16,Brooks Bandits,AJHL,54,10,45,55,28,,Playoffs,13.0,3.0,11.0,14.0,0.0,
7,2016-17,Brooks Bandits,AJHL,54,24,51,75,18,,Playoffs,13.0,5.0,11.0,16.0,4.0,
8,2017-18,UMass,NCAA,34,5,16,21,20,9.0,,,,,,,
9,,Canada U20,WJC-20,7,3,5,8,0,5.0,,,,,,,
