In [1]:
import pandas as pd
from random import randint
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import sem, ttest_ind
import statsmodels.api as sm



In [22]:
hockey_game_log = pd.read_csv("Hockey Game Logs.csv")
hockey_game_log.head()

Unnamed: 0,Team,Conference,Division,Game Number in The Season,Season,Date,Home or Away,Opponent,GF,GA,...,FA,FF%,FOW,FOL,FO%,oZS%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage
0,WSH,Eastern,Metropolitan,1,2018-19,10/3/18,Home,Boston Bruins,7,0,...,31,50.0,16,32,33.3,60.7,113.0,14,0.19,1.0
1,WSH,Eastern,Metropolitan,2,2018-19,10/4/18,Away,Pittsburgh Penguins,6,7,...,47,43.4,26,30,46.4,60.7,105.7,13,0.17,0.83
2,WSH,Eastern,Metropolitan,3,2018-19,10/10/18,Home,Vegas Golden Knights,5,2,...,41,45.3,26,30,46.4,40.5,105.4,19,0.17,0.94
3,WSH,Eastern,Metropolitan,4,2018-19,10/11/18,Away,New Jersey Devils,0,6,...,48,35.1,18,31,36.7,45.7,84.8,8,0.0,0.83
4,WSH,Eastern,Metropolitan,5,2018-19,10/13/18,Home,Toronto Maple Leafs,2,4,...,32,50.8,19,19,50.0,47.6,90.9,16,0.07,0.86


In [23]:
condensed_data = hockey_game_log.drop(columns={"Conference ","Division","Date","Opponent","oZS%","FOW","FOL","Game Number in The Season"})
condensed_data
dummies_winloss = pd.get_dummies(condensed_data["Win or Loss"])
dummies_winloss.head()
nhl_data = pd.concat([condensed_data,dummies_winloss], axis=1)
nhl_data ["Shooting Percentage"] = 100* nhl_data ["Shooting Percentage"]
nhl_data ["Save Percentage"] = 100* nhl_data ["Save Percentage"]
nhl_data ["Penalty Differential"] = nhl_data["PPO"]- nhl_data["PPO Against"]
nhl_data ["Goal Differential"] = nhl_data["GF"]- nhl_data["GA"]
nhl_data.head()

Unnamed: 0,Team,Season,Home or Away,GF,GA,Win or Loss,Overtime Status (OT or SO),Final Result,S,PIM,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
0,WSH,2018-19,Home,7,0,W,,W,37,14,...,50.0,33.3,113.0,14,19.0,100.0,0,1,4,7
1,WSH,2018-19,Away,6,7,L,OT,OTL,36,12,...,43.4,46.4,105.7,13,17.0,83.0,1,0,-4,-1
2,WSH,2018-19,Home,5,2,W,,W,29,6,...,45.3,46.4,105.4,19,17.0,94.0,0,1,1,3
3,WSH,2018-19,Away,0,6,L,,L,21,10,...,35.1,36.7,84.8,8,0.0,83.0,1,0,0,-6
4,WSH,2018-19,Home,2,4,L,,L,27,6,...,50.8,50.0,90.9,16,7.0,86.0,1,0,0,-2


In [56]:
X = nhl_data["S"]
y = nhl_data["GF"]

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,GF,R-squared (uncentered):,0.738
Model:,OLS,Adj. R-squared (uncentered):,0.738
Method:,Least Squares,F-statistic:,21280.0
Date:,"Wed, 30 Oct 2019",Prob (F-statistic):,0.0
Time:,19:38:13,Log-Likelihood:,-14826.0
No. Observations:,7544,AIC:,29650.0
Df Residuals:,7543,BIC:,29660.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
S,0.0908,0.001,145.874,0.000,0.090,0.092

0,1,2,3
Omnibus:,116.104,Durbin-Watson:,1.977
Prob(Omnibus):,0.0,Jarque-Bera (JB):,117.305
Skew:,0.289,Prob(JB):,3.37e-26
Kurtosis:,2.801,Cond. No.,1.0


In [24]:
# Create the bins in which Data will be held
# Bins are 0, 60, 70, 80, 90, 100.   
corsi_bins = [0,30, 40, 45, 50, 55, 60,70, 100]

# Create the names for the four bins
group_names = ["0-30%","30-40%", "40-45%", "45-50%", "50-55%","55-60%","60-70","Over 70%"]

In [25]:
nhl_data["Corsi Group"] = pd.cut(nhl_data["CF%"], corsi_bins, labels=group_names, right=False)
nhl_data.head()

Unnamed: 0,Team,Season,Home or Away,GF,GA,Win or Loss,Overtime Status (OT or SO),Final Result,S,PIM,...,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential,Corsi Group
0,WSH,2018-19,Home,7,0,W,,W,37,14,...,33.3,113.0,14,19.0,100.0,0,1,4,7,45-50%
1,WSH,2018-19,Away,6,7,L,OT,OTL,36,12,...,46.4,105.7,13,17.0,83.0,1,0,-4,-1,40-45%
2,WSH,2018-19,Home,5,2,W,,W,29,6,...,46.4,105.4,19,17.0,94.0,0,1,1,3,40-45%
3,WSH,2018-19,Away,0,6,L,,L,21,10,...,36.7,84.8,8,0.0,83.0,1,0,0,-6,40-45%
4,WSH,2018-19,Home,2,4,L,,L,27,6,...,50.0,90.9,16,7.0,86.0,1,0,0,-2,50-55%


In [26]:
fenwick_bins = [0,30, 40, 45, 50, 55, 60,70, 100]

# Create the names for the four bins
group_names = ["0-30%","30-40%", "40-45%", "45-50%", "50-55%","55-60%","60-70","Over 70%"]

In [27]:
nhl_data["Fenwick Group"] = pd.cut(nhl_data["FF%"], fenwick_bins, labels=group_names, right=False)
nhl_data.head()

Unnamed: 0,Team,Season,Home or Away,GF,GA,Win or Loss,Overtime Status (OT or SO),Final Result,S,PIM,...,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential,Corsi Group,Fenwick Group
0,WSH,2018-19,Home,7,0,W,,W,37,14,...,113.0,14,19.0,100.0,0,1,4,7,45-50%,50-55%
1,WSH,2018-19,Away,6,7,L,OT,OTL,36,12,...,105.7,13,17.0,83.0,1,0,-4,-1,40-45%,40-45%
2,WSH,2018-19,Home,5,2,W,,W,29,6,...,105.4,19,17.0,94.0,0,1,1,3,40-45%,45-50%
3,WSH,2018-19,Away,0,6,L,,L,21,10,...,84.8,8,0.0,83.0,1,0,0,-6,40-45%,30-40%
4,WSH,2018-19,Home,2,4,L,,L,27,6,...,90.9,16,7.0,86.0,1,0,0,-2,50-55%,50-55%


In [28]:
fenwick_groups = nhl_data.groupby("Fenwick Group").mean()
fenwick_groups
# Score effects are likely highly impactful on this data in a way that is hard to measure. 
# It seems like teams that dominate the puck are less likely to win, but that has to be because of score. 

Unnamed: 0_level_0,GF,GA,S,PIM,PPG,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
Fenwick Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0-30%,2.837838,2.810811,20.216216,9.054054,0.837838,3.405405,0.108108,42.405405,8.567568,0.594595,...,27.894595,46.127027,106.397297,14.864865,14.405405,93.135135,0.432432,0.567568,-0.378378,0.027027
30-40%,2.95641,2.832051,24.251282,8.948718,0.644872,2.85641,0.110256,38.051282,8.180769,0.520513,...,36.658846,48.223077,104.517179,14.187179,12.411538,92.494872,0.435897,0.564103,-0.296154,0.124359
40-45%,2.923645,2.896552,27.412972,8.890805,0.630542,2.860427,0.10509,35.043514,8.344828,0.545977,...,42.56757,48.668801,102.091297,13.317734,10.810345,91.641215,0.49179,0.50821,-0.171593,0.027094
45-50%,2.967419,2.926692,29.810777,8.594612,0.628446,2.954261,0.08208,32.760652,8.444862,0.550752,...,47.346429,49.510652,100.768108,12.546366,10.077694,90.989975,0.480576,0.519424,-0.062657,0.040727
50-55%,2.914147,2.946544,32.434125,8.510799,0.550216,3.00594,0.070194,29.975702,8.611771,0.617711,...,52.217333,50.411447,99.37797,11.776998,9.094492,90.076134,0.515119,0.484881,0.046976,-0.032397
55-60%,2.90404,2.925084,34.944444,8.284512,0.54798,3.026936,0.070707,27.474747,8.888047,0.627946,...,57.261027,51.236532,97.985354,11.470539,8.425926,89.243266,0.510101,0.489899,0.180976,-0.021044
60-70,2.822542,2.953237,37.931655,8.206235,0.520384,3.155875,0.07554,24.376499,8.948441,0.640288,...,63.102638,51.888489,95.502998,10.441247,7.56235,87.693046,0.561151,0.438849,0.292566,-0.130695
Over 70%,2.820513,2.871795,42.230769,9.333333,0.589744,3.820513,0.051282,20.307692,9.282051,0.923077,...,71.997436,53.310256,94.017949,9.769231,6.948718,85.538462,0.564103,0.435897,0.205128,-0.051282


In [29]:
corsi_data = nhl_data.groupby("Corsi Group").mean()
corsi_data
# Score effects are likely highly impactful on this data in a way that is hard to measure. 
# It seems like teams that dominate the puck are less likely to win, but that has to be because of score. 

Unnamed: 0_level_0,GF,GA,S,PIM,PPG,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
Corsi Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0-30%,2.52,2.32,20.52,9.8,0.76,3.52,0.08,41.92,9.6,0.36,...,28.744,46.188,105.632,18.2,12.76,94.32,0.4,0.6,-0.2,0.2
30-40%,3.223288,2.613699,24.684932,9.060274,0.69589,2.869863,0.117808,37.550685,8.321918,0.489041,...,37.471918,48.10137,105.791096,16.157534,13.313699,93.008219,0.352055,0.647945,-0.258904,0.609589
40-45%,3.082305,2.746502,27.381893,8.859259,0.640329,2.869136,0.111934,34.887243,8.277366,0.539095,...,42.844691,48.750617,103.166337,14.330041,11.354733,92.069959,0.419753,0.580247,-0.265844,0.335802
45-50%,2.979722,2.8708,30.128621,8.502897,0.600232,2.911935,0.085168,32.529548,8.458285,0.548088,...,47.623001,49.362688,100.987312,12.89803,9.983198,91.125145,0.471611,0.528389,-0.052144,0.108922
50-55%,2.874118,2.974498,32.328269,8.553988,0.571351,2.998372,0.067282,30.20076,8.59197,0.615844,...,52.136354,50.613022,99.121161,11.450353,8.984807,90.086815,0.526316,0.473684,0.048833,-0.10038
55-60%,2.747927,3.091211,34.879768,8.218905,0.536484,3.119403,0.063018,27.504146,8.819237,0.636816,...,56.980182,51.192371,96.866584,10.388889,7.970978,88.69403,0.583748,0.416252,0.278607,-0.343284
60-70,2.64026,3.218182,37.438961,8.327273,0.492208,3.132468,0.072727,24.757143,9.027273,0.702597,...,62.364286,51.838961,94.338961,8.618182,7.120779,86.767532,0.636364,0.363636,0.228571,-0.577922
Over 70%,2.448276,2.517241,41.965517,9.827586,0.413793,3.517241,0.034483,20.965517,9.655172,0.724138,...,70.817241,54.1,94.555172,6.793103,6.068966,87.517241,0.586207,0.413793,0.172414,-0.068966


In [30]:
shots_bins = [0,20, 25, 30, 35, 40, 45,50, 200]

# Create the names for the four bins
group_names = ["0-20","20-25", "25-30", "30-35", "35-40","40-45","45-50","Over 50"]
nhl_data["Shots Group"] = pd.cut(nhl_data["S"], shots_bins, labels=group_names, right=False)
nhl_data.head()

Unnamed: 0,Team,Season,Home or Away,GF,GA,Win or Loss,Overtime Status (OT or SO),Final Result,S,PIM,...,Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential,Corsi Group,Fenwick Group,Shots Group
0,WSH,2018-19,Home,7,0,W,,W,37,14,...,14,19.0,100.0,0,1,4,7,45-50%,50-55%,35-40
1,WSH,2018-19,Away,6,7,L,OT,OTL,36,12,...,13,17.0,83.0,1,0,-4,-1,40-45%,40-45%,35-40
2,WSH,2018-19,Home,5,2,W,,W,29,6,...,19,17.0,94.0,0,1,1,3,40-45%,45-50%,25-30
3,WSH,2018-19,Away,0,6,L,,L,21,10,...,8,0.0,83.0,1,0,0,-6,40-45%,30-40%,20-25
4,WSH,2018-19,Home,2,4,L,,L,27,6,...,16,7.0,86.0,1,0,0,-2,50-55%,50-55%,25-30


In [31]:
shots_data = nhl_data.groupby("Shots Group").mean()
shots_data
# Biggest observations from here is that you don't want to have less than 20 shots on net. Teams lose 60% of the time
# and draw less penalties. Hard to tell for wins/ losses but sweet spot might actually be between 25-30 shots. 
# Maybe it's because these teams are more selective?


Unnamed: 0_level_0,GF,GA,S,PIM,PPG,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
Shots Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0-20,2.169154,2.940299,17.562189,9.651741,0.432836,2.626866,0.064677,32.810945,7.577114,0.671642,...,38.358209,47.696517,103.032338,13.273632,12.567164,90.930348,0.597015,0.402985,-0.845771,-0.771144
20-25,2.581162,2.681363,22.386774,8.670341,0.52004,2.592184,0.071142,32.552104,7.578156,0.54008,...,42.932265,49.017034,102.672645,13.107214,11.594188,91.512024,0.501002,0.498998,-0.511022,-0.1002
25-30,2.817904,2.833164,27.091556,9.027976,0.545778,2.803154,0.089013,31.832146,8.16175,0.56002,...,47.079247,49.583571,101.186724,12.696846,10.433367,90.872838,0.485249,0.514751,-0.292981,-0.015259
30-35,2.989564,2.97296,31.933112,8.63425,0.595351,3.00759,0.091556,31.149905,8.788425,0.612903,...,50.890465,50.053083,99.664801,12.051708,9.378558,90.175996,0.501423,0.498577,0.020398,0.016603
35-40,3.082749,3.042076,36.795231,8.074334,0.637447,3.260168,0.081346,30.348527,9.045582,0.600982,...,54.397546,50.550631,98.079734,11.654979,8.442496,89.651473,0.51122,0.48878,0.407433,0.040673
40-45,3.25,3.029412,41.665033,7.72549,0.694444,3.357843,0.076797,29.349673,9.372549,0.584967,...,57.45915,51.715196,97.544935,11.361111,7.908497,89.44281,0.485294,0.514706,0.678105,0.220588
45-50,3.31016,3.02139,46.379679,8.0,0.663102,3.347594,0.053476,29.957219,9.572193,0.625668,...,59.734759,51.21016,97.652941,11.818182,7.096257,89.684492,0.491979,0.508021,0.764706,0.28877
Over 50,3.5,3.434783,52.26087,8.521739,0.869565,4.369565,0.065217,28.826087,12.826087,0.717391,...,63.232609,52.021739,95.226087,10.630435,6.76087,87.804348,0.5,0.5,1.608696,0.065217


In [32]:
winloss_stats = nhl_data.groupby(["Win or Loss"])
winloss_sum = winloss_stats.sum()
winloss_sum


Unnamed: 0_level_0,GF,GA,S,PIM,PPG,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
Win or Loss,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
L,6988,15031,117611,32897,1615,11233,191,117884,31772,2817,...,190322.1,187908.6,352059.6,42869,23089.0,327554.0,3772.0,0.0,-31,-8043
W,15031,6988,117884,31776,2817,11264,437,117611,32901,1615,...,186884.2,189294.8,402344.4,49519,49807.0,354191.0,0.0,3772.0,31,8043


In [33]:
winloss_mean = winloss_stats.mean()
winloss_mean
# To me, it's pretty significant that there isn't really a difference in number of shots or penalty minutes teams receive. 

Unnamed: 0_level_0,GF,GA,S,PIM,PPG,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
Win or Loss,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
L,1.852598,3.984889,31.180011,8.721368,0.428155,2.977996,0.050636,31.252386,8.423118,0.746819,...,50.456548,49.816702,93.334995,11.365058,6.121156,86.838282,1,0,-0.008218,-2.132291
W,3.984889,1.852598,31.252386,8.424178,0.746819,2.986214,0.115854,31.180011,8.722428,0.428155,...,49.545122,50.184199,106.666066,13.128049,13.204401,93.900053,0,1,0.008218,2.132291


In [34]:
nhl_data.groupby(["GA"]).mean()
# 2 or less is magic number for goals against. Teams that give up 2 or less win 71% of the time. 

Unnamed: 0_level_0,GF,S,PIM,PPG,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,PPO Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
GA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,3.074398,29.789934,8.258206,0.549234,2.95186,0.111597,29.85558,9.028446,0.0,2.851204,...,48.882932,49.971772,110.023632,13.667396,10.684902,100.0,0.0,1.0,0.100656,3.074398
1,3.159966,30.449749,8.135678,0.576214,2.859296,0.105528,30.226968,8.369347,0.250419,2.866834,...,49.553685,50.185678,106.898409,13.350921,10.753769,96.508375,0.062814,0.937186,-0.007538,2.159966
2,3.061652,31.1418,8.114673,0.593711,2.930949,0.080148,30.874229,8.268804,0.416769,2.929716,...,50.115475,50.071455,103.226634,12.787916,10.274353,93.211467,0.287916,0.712084,0.001233,1.061652
3,2.855296,31.60899,8.301108,0.567734,2.967365,0.077586,31.240148,8.258005,0.64101,2.953818,...,50.328202,50.056219,99.614101,11.9883,9.308498,89.924261,0.56835,0.43165,0.013547,-0.144704
4,2.635053,31.620637,8.714719,0.584977,3.044006,0.066768,31.397572,8.522762,0.752656,3.002276,...,50.562747,49.975114,95.265023,11.361153,8.544006,86.698786,0.802731,0.197269,0.04173,-1.364947
5,2.707751,31.660737,8.927573,0.595934,3.15756,0.064803,32.31385,9.0,0.96061,3.113088,...,49.889072,49.885769,92.827954,11.299873,8.738247,83.787802,0.916137,0.083863,0.044473,-2.292249
6,2.991453,31.592593,10.60114,0.672365,3.076923,0.111111,32.774929,9.814815,1.116809,3.273504,...,49.764387,49.707977,90.595726,11.606838,9.646724,81.039886,0.965812,0.034188,-0.196581,-3.008547
7,3.013793,31.834483,11.303448,0.655172,2.944828,0.096552,34.524138,9.331034,1.37931,3.496552,...,49.311724,49.112414,88.553793,10.662069,9.468966,78.951724,0.986207,0.013793,-0.551724,-3.986207
8,3.459459,30.540541,15.72973,0.945946,3.783784,0.054054,35.027027,15.486486,1.648649,3.513514,...,46.916216,48.964865,88.886486,10.837838,11.513514,76.243243,1.0,0.0,0.27027,-4.540541
9,3.2,30.6,6.0,0.2,2.0,0.2,35.0,4.4,1.8,2.8,...,50.56,51.3,84.88,9.4,11.0,73.8,1.0,0.0,-0.8,-5.8


In [35]:
nhl_data.groupby(["GF"]).mean()
# Score 3 goals and win 56% of games. 

Unnamed: 0_level_0,GA,S,PIM,PPG,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,PPO Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
GF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,3.074398,29.85558,9.028446,0.0,2.851204,0.0,29.789934,8.258206,0.549234,2.95186,...,51.1186,50.028884,89.978775,10.993435,0.0,89.374179,1.0,0.0,-0.100656,-3.074398
1,3.159966,30.226968,8.369347,0.250419,2.866834,0.025126,30.449749,8.140704,0.576214,2.859296,...,50.447571,49.815578,93.102931,11.4933,3.512563,89.284757,0.937186,0.062814,0.007538,-2.159966
2,3.061652,30.874229,8.265105,0.416769,2.929716,0.050555,31.1418,8.108508,0.593711,2.930949,...,49.88656,49.929593,96.774229,12.049322,6.791615,89.75894,0.712084,0.287916,-0.001233,-1.061652
3,2.855296,31.240148,8.2617,0.64101,2.953818,0.083744,31.60899,8.299877,0.567734,2.967365,...,49.67383,49.944704,100.386946,12.480911,10.129926,90.714286,0.43165,0.56835,-0.013547,0.144704
4,2.635053,31.397572,8.521244,0.752656,3.002276,0.12519,31.620637,8.716237,0.584977,3.044006,...,49.43824,50.025569,104.73566,12.799697,13.374052,91.483308,0.197269,0.802731,-0.04173,1.364947
5,2.707751,32.31385,9.0,0.96061,3.113088,0.149936,31.660737,8.927573,0.595934,3.15756,...,50.112834,50.11474,107.172808,12.836086,16.23507,91.299873,0.083863,0.916137,-0.044473,2.292249
6,2.991453,32.774929,9.820513,1.116809,3.273504,0.148148,31.592593,10.606838,0.672365,3.076923,...,50.237607,50.292877,109.405983,12.954416,18.962963,90.376068,0.034188,0.965812,0.196581,3.008547
7,3.013793,34.524138,9.331034,1.37931,3.496552,0.206897,31.834483,11.317241,0.655172,2.944828,...,50.689655,50.888276,111.446897,12.172414,21.103448,90.544828,0.013793,0.986207,0.551724,3.986207
8,3.459459,35.027027,15.486486,1.648649,3.513514,0.378378,30.540541,15.72973,0.945946,3.783784,...,53.086486,51.035135,111.113514,11.648649,23.756757,88.513514,0.0,1.0,-0.27027,4.540541
9,3.2,35.0,4.4,1.8,2.8,0.0,30.6,6.0,0.2,2.0,...,49.44,48.72,115.12,12.4,26.2,89.0,0.0,1.0,0.8,5.8


In [36]:
nhl_data.groupby(["PPO"]).mean()

Unnamed: 0_level_0,GF,GA,S,PIM,PPG,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,PPO Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
PPO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2.778481,2.537975,29.208861,5.905063,0.0,0.063291,32.759494,2.056962,0.379747,1.962025,...,48.785443,49.116456,102.552532,14.360759,9.860759,92.044304,0.462025,0.537975,-1.962025,0.240506
1,2.797938,2.803093,29.351546,7.141237,0.212371,0.079381,32.079381,4.013402,0.490722,2.493814,...,49.220928,49.710928,101.032165,13.431959,9.789691,91.008247,0.494845,0.505155,-1.493814,-0.005155
2,2.847396,2.8125,30.326042,7.715104,0.383333,0.072917,31.805729,6.034896,0.525,2.784375,...,49.547552,50.071875,100.905729,12.951042,9.729688,90.915625,0.485417,0.514583,-0.784375,0.034896
3,2.872134,2.980642,31.457463,8.320428,0.593989,0.08813,31.15894,8.332145,0.590932,2.948039,...,50.249822,49.892461,99.416047,12.086602,9.447275,90.164544,0.517066,0.482934,0.051961,-0.108507
4,2.922644,3.021097,32.070323,9.472574,0.7609,0.087904,30.726442,10.966948,0.662447,3.286217,...,50.442616,50.149086,99.15007,11.496484,9.440928,89.850211,0.523207,0.476793,0.713783,-0.098453
5,3.141631,3.125894,32.751073,10.678112,1.008584,0.100143,30.207439,13.668097,0.728183,3.505007,...,50.682833,50.126037,98.888269,11.090129,9.866953,89.321888,0.506438,0.493562,1.494993,0.015737
6,3.315789,2.805921,33.832237,11.296053,1.180921,0.088816,29.605263,16.927632,0.648026,3.582237,...,50.557237,50.582237,99.909211,10.773026,10.115132,90.328947,0.434211,0.565789,2.417763,0.509868
7,3.695122,2.841463,32.939024,11.682927,1.609756,0.060976,29.609756,18.756098,0.719512,3.902439,...,49.947561,50.230488,100.621951,10.573171,11.439024,90.170732,0.390244,0.609756,3.097561,0.853659
8,3.684211,3.157895,36.684211,12.157895,1.789474,0.052632,28.0,23.315789,0.842105,4.052632,...,53.421053,51.778947,98.326316,9.789474,10.631579,88.894737,0.421053,0.578947,3.947368,0.526316
9,4.0,3.8,33.4,16.2,1.6,0.0,30.0,36.0,0.8,4.0,...,48.26,46.8,97.96,9.4,12.2,87.0,0.4,0.6,5.0,0.2


In [37]:
nhl_data.groupby(["Penalty Differential"]).mean()
# One thing that I think is interesting from data is the extent to which the games of hockey evens out to reward the trailing team. 
# For instance, teams that commit more penalties if the difference is between 1-3 win more than they lose. If that's significant, it means 
# refs favor the trailing team, or it's induced by score effects. i.e trailing team controls the puck and draws more penalties. 

Unnamed: 0_level_0,GF,GA,S,PIM,PPG,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,...,FA,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Goal Differential
Penalty Differential,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-7,0.666667,3.666667,27.0,38.666667,0.333333,2.666667,0.0,38.333333,15.0,1.333333,...,30.0,49.266667,56.233333,91.033333,8.666667,2.666667,90.666667,1.0,0.0,-3.0
-6,3.25,2.75,24.0,17.25,0.25,1.5,0.5,31.5,5.25,1.0,...,32.0,43.725,44.275,103.575,13.5,13.25,90.5,0.5,0.5,0.5
-5,2.851064,3.489362,28.042553,17.808511,0.255319,1.531915,0.234043,34.446809,6.276596,1.446809,...,34.319149,48.72766,49.048936,101.470213,11.276596,10.425532,89.425532,0.617021,0.382979,-0.638298
-4,2.863309,3.553957,27.582734,16.028777,0.323741,1.784173,0.194245,34.402878,7.374101,1.302158,...,34.352518,48.964029,48.648921,101.315827,11.633094,10.582734,89.330935,0.546763,0.453237,-0.690647
-3,2.957447,2.941489,29.007979,12.569149,0.388298,1.779255,0.170213,33.792553,6.058511,0.888298,...,35.715426,49.256915,50.284574,101.844149,12.382979,10.529255,91.058511,0.486702,0.513298,0.015957
-2,2.888771,2.939619,29.257415,10.794492,0.395127,2.070975,0.115466,33.242585,6.558263,0.78072,...,36.760593,48.785593,49.501907,101.592055,12.617585,10.183263,90.951271,0.486229,0.513771,-0.050847
-1,2.914166,2.838102,30.236567,9.366364,0.468248,2.339846,0.091417,32.157013,7.199581,0.665038,...,36.812282,49.191486,49.87104,101.172017,12.510816,9.953245,90.919749,0.494766,0.505234,0.076064
0,2.923123,2.923123,31.174939,7.817797,0.546005,2.77845,0.080508,31.174939,7.819007,0.546005,...,36.433414,50.001029,50.000242,100.000303,12.40678,9.67615,90.357143,0.5,0.5,0.0
1,2.838102,2.914166,32.157013,7.200977,0.665038,3.339846,0.056525,30.236567,9.369156,0.468248,...,35.655967,50.810328,50.130077,98.82903,12.223308,9.113747,90.076064,0.505234,0.494766,-0.076064
2,2.939619,2.888771,33.242585,6.554025,0.78072,4.070975,0.045551,29.257415,10.792373,0.395127,...,34.862288,51.21589,50.498941,98.408898,11.695975,9.072034,89.853814,0.513771,0.486229,0.050847


In [38]:
shg= nhl_data.groupby(["SHG"]).mean()
#Teams that score shortie will score more and win more 
shg

Unnamed: 0_level_0,GF,GA,S,PIM,PPG,PPO,Shots Against,Penalty Minutes by Opponent,PPG Against,PPO Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
SHG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2.830023,2.932642,31.22582,8.381405,0.581462,2.975964,31.162061,8.489062,0.584053,2.915083,...,50.113587,49.995121,99.895711,12.253886,9.365573,90.312464,0.516695,0.483305,0.060881,-0.102619
1,3.934629,2.754417,31.075972,10.666078,0.662544,3.035336,31.84629,9.524735,0.625442,3.727915,...,48.776502,49.984452,101.307597,12.178445,13.070671,91.035336,0.305654,0.694346,-0.69258,1.180212
2,4.214286,2.964286,31.821429,13.25,0.571429,3.321429,31.642857,10.071429,0.714286,4.428571,...,47.346429,51.003571,98.967857,11.785714,13.857143,90.392857,0.321429,0.678571,-1.107143,1.25
3,5.5,0.5,29.0,15.5,0.5,4.5,35.0,9.0,0.0,4.5,...,41.95,59.0,108.7,12.5,19.0,98.5,0.0,1.0,0.0,5.0


In [39]:
ppg = nhl_data.groupby(["PPG"]).mean()
ppg
# Teams that score PPG score more and win more. 

Unnamed: 0_level_0,GF,GA,S,PIM,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,PPO Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
PPG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2.374637,2.866409,30.804937,8.129961,2.543078,0.077444,31.052275,7.374395,0.558083,2.870039,...,50.436399,50.130784,100.041505,12.128267,7.960794,90.452323,0.579864,0.420136,-0.32696,-0.491772
1,3.292578,2.983594,31.500391,8.792578,3.265625,0.089063,31.430078,9.311328,0.602734,3.042187,...,49.548672,49.833242,99.86332,12.435937,10.903906,90.259766,0.451562,0.548438,0.223438,0.308984
2,4.238976,3.01138,32.204836,9.688478,4.083926,0.086771,31.513514,11.334282,0.674253,3.334282,...,49.234851,49.738976,100.048791,12.251778,13.675676,90.172119,0.28734,0.71266,0.749644,1.227596
3,5.21374,2.801527,32.916031,11.603053,4.954198,0.122137,30.709924,15.221374,0.732824,3.427481,...,49.30916,50.90687,101.082443,12.21374,16.274809,90.931298,0.129771,0.870229,1.526718,2.412214
4,6.470588,2.882353,34.235294,13.764706,6.058824,0.176471,30.647059,23.0,0.705882,3.176471,...,49.270588,47.811765,101.135294,12.529412,19.235294,90.705882,0.058824,0.941176,2.882353,3.588235
5,5.0,4.0,34.0,6.0,6.0,0.0,28.0,12.0,1.0,3.0,...,49.3,41.8,87.0,12.0,15.0,86.0,0.0,1.0,3.0,1.0


In [45]:
home_ice = nhl_data.groupby(["Team","Home or Away"]).mean()
home_ice

Unnamed: 0_level_0,Unnamed: 1_level_0,GF,GA,S,PIM,PPG,PPO,SHG,Shots Against,Penalty Minutes by Opponent,PPG Against,...,FF%,FO%,PDO (Even Strength),Blocked Shots (Even Strength),Shooting Percentage,Save Percentage,L,W,Penalty Differential,Goal Differential
Team,Home or Away,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ANA,Away,2.422764,2.943089,27.796748,10.211382,0.504065,2.609756,0.073171,32.308943,8.260163,0.617886,...,46.494309,50.935772,99.185366,12.495935,9.268293,90.439024,0.585366,0.414634,-0.837398,-0.520325
ANA,Home,2.918699,2.479675,30.504065,9.934959,0.479675,2.894309,0.121951,31.674797,9.569106,0.552846,...,50.021951,53.573984,101.952033,12.073171,9.894309,91.959350,0.398374,0.601626,-0.373984,0.439024
ARI,Away,2.447154,3.276423,28.609756,7.691057,0.495935,2.780488,0.081301,32.821138,7.560976,0.560976,...,45.849593,46.226016,98.203252,12.723577,8.845528,89.658537,0.642276,0.357724,-0.097561,-0.829268
ARI,Home,2.577236,2.731707,30.357724,8.585366,0.487805,3.203252,0.130081,31.934959,8.943089,0.569106,...,47.995122,49.913008,100.604878,13.065041,8.967480,91.146341,0.560976,0.439024,0.308943,-0.154472
BOS,Away,2.894309,2.731707,32.414634,9.642276,0.569106,2.788618,0.097561,29.471545,8.430894,0.585366,...,52.677236,50.730081,99.682114,12.211382,9.341463,90.487805,0.487805,0.512195,-0.398374,0.162602
BOS,Home,3.308943,2.479675,33.471545,9.691057,0.886179,3.333333,0.113821,27.512195,10.926829,0.447154,...,55.561789,52.348780,99.826016,9.821138,10.130081,90.707317,0.349593,0.650407,0.390244,0.829268
BUF,Away,2.487805,3.325203,30.959350,8.089431,0.577236,2.902439,0.056911,33.585366,7.926829,0.691057,...,47.945528,46.933333,98.160163,12.195122,8.252033,89.837398,0.682927,0.317073,-0.056911,-0.837398
BUF,Home,2.601626,3.081301,32.008130,7.747967,0.658537,3.000000,0.065041,33.065041,8.406504,0.495935,...,48.538211,50.756911,98.213008,10.048780,8.373984,90.439024,0.577236,0.422764,0.373984,-0.479675
CAR,Away,2.764228,3.073171,32.121951,6.560976,0.544715,2.829268,0.089431,28.626016,7.276423,0.512195,...,51.991057,49.565854,98.101626,10.601626,9.048780,88.959350,0.577236,0.422764,0.162602,-0.308943
CAR,Home,2.829268,2.739837,33.691057,6.284553,0.504065,3.000000,0.081301,28.528455,7.569106,0.455285,...,53.817073,54.146341,99.020325,11.357724,8.658537,90.227642,0.463415,0.536585,0.528455,0.089431
