In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import sqlite3
from scipy.stats import linregress
from tabulate import tabulate

In [3]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [4]:
conn = sqlite3.connect("../db/game-data.sqlite")

In [70]:
fenwick_coeffs = pd.read_sql("SELECT * FROM fenwick_coeffs", conn)

In [None]:
# Home Lead vs Fenwick Adjustment Coefficient
fenwick_coeffs.head(10)

Unnamed: 0,HomeLeadBinned,HomeCoeff,AwayCoeff
0,-3,0.854558,1.145442
1,-2,0.881844,1.118156
2,-1,0.913341,1.086669
3,0,0.972246,1.027758
4,1,1.03138,0.968629
5,2,1.066231,0.933769
6,3,1.083466,0.916561


In [None]:
fenwicks = pd.read_sql("SELECT * FROM FenwickAndScore", conn)

In [45]:
fenwicks["season"] = fenwicks["gameid"].apply(lambda id: math.floor(id / 1_000_000))
fenwicks["fenwickPct"] = fenwicks.AdjustedFenwickFor / (fenwicks.AdjustedFenwickFor + fenwicks.AdjustedFenwickAgainst)
fenwicks["won"] = (fenwicks.ScoreFor > fenwicks.ScoreAgainst).astype(int)

In [46]:
fenwicks.head()

Unnamed: 0,gameid,TeamId,TeamName,AdjustedFenwickFor,RawFenwickFor,isHomeTeam,OpponentId,OpponentName,AdjustedFenwickAgainst,RawFenwickAgainst,ScoreFor,ScoreAgainst,season,fenwickPct,won
0,2010020001,8,Montréal Canadiens,28.99224,30,0,10,Toronto Maple Leafs,27.680428,27,2,3,2010,0.511574,0
1,2010020001,10,Toronto Maple Leafs,27.680428,27,1,8,Montréal Canadiens,28.99224,30,3,2,2010,0.488426,1
2,2010020002,4,Philadelphia Flyers,30.865375,29,0,5,Pittsburgh Penguins,34.908733,37,3,2,2010,0.469263,1
3,2010020002,5,Pittsburgh Penguins,34.908733,37,1,4,Philadelphia Flyers,30.865375,29,2,3,2010,0.530737,0
4,2010020003,12,Carolina Hurricanes,37.900863,36,0,30,Minnesota Wild,24.733733,26,4,3,2010,0.605111,1


In [51]:
def fenwick_correlations(predictor_for, predictor_against, to_predict_for, to_predict_against, n1, n2):

    scorrs = []

    for _, season_games in fenwicks.groupby(["season", "TeamId"]):
        #if season not in (2010, 2011, 2012, 2013, 2014): continue
        corrs = []
        for i in range(0, 100):
            season_sample = season_games.sample(n=40)
            season_1 = season_sample.iloc[0:n1]
            season_2 = season_sample.iloc[n1:n2]
            
            dataFenwick = season_1[predictor_for].mean() - (season_1[predictor_against].mean() if to_predict_against else 0)
            dataOut = season_2[to_predict_for].mean() - (season_2[to_predict_against].mean() if to_predict_against else 0)

            scorrs.append((dataFenwick, dataOut))

    corrs = pd.DataFrame(scorrs)
    _, _, r_value, p_value, _ = linregress(corrs[0], corrs[1])
    
    return r_value**2


In [39]:
# Auto Determination 20 games predicting 20 games
adj_corr = fenwick_correlations("AdjustedFenwickFor", "AdjustedFenwickAgainst", "AdjustedFenwickFor", "AdjustedFenwickAgainst", 20, 40)
raw_corr = fenwick_correlations("RawFenwickFor", "RawFenwickAgainst", "RawFenwickFor", "RawFenwickAgainst", 20, 40)

In [40]:
print(tabulate([["Raw Fenwick", raw_corr],["Score-adjusted Fenwick", adj_corr]], headers=['Possession Metric', 'Auto-determination (R^2)']))

Possession Metric         Auto-determination (R^2)
----------------------  --------------------------
Raw Fenwick                               0.490059
Score-adjusted Fenwick                    0.552636


In [65]:
# Determination of goal percentage, 20 games predicting 20 games
adj_gf_corr = fenwick_correlations("AdjustedFenwickFor", "AdjustedFenwickAgainst", "ScoreFor", "ScoreAgainst", 20, 40)
raw_gf_corr = fenwick_correlations("RawFenwickFor", "RawFenwickAgainst", "ScoreFor", "ScoreAgainst", 20, 40)

In [66]:
print(tabulate([["Raw Fenwick", raw_gf_corr],["Score-adjusted Fenwick", adj_gf_corr]], headers=['Possession Metric', 'Determination of Goal Differential (R^2)']))

Possession Metric         Determination of Goal Differential (R^2)
----------------------  ------------------------------------------
Raw Fenwick                                               0.176267
Score-adjusted Fenwick                                    0.214187


In [None]:
# Determination of winning percentage, 20 games predicting 20 games
adj_wpct_corr = fenwick_correlations("AdjustedFenwickFor", "AdjustedFenwickAgainst", "won", "", 20, 40)
raw_wpct_corr = fenwick_correlations("RawFenwickFor", "RawFenwickAgainst", "won", "", 20, 40)

In [55]:
print(tabulate([["Raw Fenwick", raw_wpct_corr],["Score-adjusted Fenwick", adj_wpct_corr]], headers=['Possession Metric', 'Determination of Winning Percentage (R^2)']))

Possession Metric         Determination of Winning Percentage (R^2)
----------------------  -------------------------------------------
Raw Fenwick                                               0.068343
Score-adjusted Fenwick                                    0.0838039


In [56]:
# Auto Determination 5 games predicting 35 games
adj_5_corr = fenwick_correlations("AdjustedFenwickFor", "AdjustedFenwickAgainst", "AdjustedFenwickFor", "AdjustedFenwickAgainst", 5, 35)
raw_5_corr = fenwick_correlations("RawFenwickFor", "RawFenwickAgainst", "RawFenwickFor", "RawFenwickAgainst", 5, 35)

In [57]:
print(tabulate([["Raw Fenwick", raw_5_corr],["Score-adjusted Fenwick", adj_5_corr]], headers=['Possession Metric', 'Auto-determination (R^2)']))

Possession Metric         Auto-determination (R^2)
----------------------  --------------------------
Raw Fenwick                               0.281344
Score-adjusted Fenwick                    0.336388


In [68]:
# Determination of goal percentage, 5 games predicting 35 games
adj_5gf_corr = fenwick_correlations("AdjustedFenwickFor", "AdjustedFenwickAgainst", "ScoreFor", "ScoreAgainst", 5, 35)
raw_5gf_corr = fenwick_correlations("RawFenwickFor", "RawFenwickAgainst", "ScoreFor", "ScoreAgainst", 5, 35)

In [69]:
print(tabulate([["Raw Fenwick", raw_5gf_corr],["Score-adjusted Fenwick", adj_5gf_corr]], headers=['Possession Metric', 'Determination of Goal Differential (R^2)']))

Possession Metric         Determination of Goal Differential (R^2)
----------------------  ------------------------------------------
Raw Fenwick                                               0.111019
Score-adjusted Fenwick                                    0.14558


In [61]:
# Determination of winning percentage, 5 games predicting 35 games
adj_5wpct_corr = fenwick_correlations("AdjustedFenwickFor", "AdjustedFenwickAgainst", "won", "", 20, 40)
raw_5wpct_corr = fenwick_correlations("RawFenwickFor", "RawFenwickAgainst", "won", "", 20, 40)

In [63]:
print(tabulate([["Raw Fenwick", raw_5wpct_corr],["Score-adjusted Fenwick", adj_5wpct_corr]], headers=['Possession Metric', 'Determination of Winning Percentage (R^2)']))

Possession Metric         Determination of Winning Percentage (R^2)
----------------------  -------------------------------------------
Raw Fenwick                                               0.0672801
Score-adjusted Fenwick                                    0.084764
