In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("Data/2018drives_augmented.tsv", sep="\t", index_col=False)
df

Unnamed: 0,offense,defense,game_id,plays,start_yardline,yards,end_yardline,drive_result,points,start_distance,dist_range
0,Alabama,Louisville,401012246,7,35,65,100,TD,7,65,"(60.0, 65.0]"
1,Louisville,Alabama,401012246,4,75,20,55,PUNT,0,75,"(70.0, 75.0]"
2,Alabama,Louisville,401012246,8,20,63,65,FUMBLE,0,80,"(75.0, 80.0]"
3,Louisville,Alabama,401012246,3,83,-1,84,PUNT,0,83,"(80.0, 85.0]"
4,Alabama,Louisville,401012246,6,45,55,100,TD,7,55,"(50.0, 55.0]"
5,Louisville,Alabama,401012246,8,75,49,26,INT,0,75,"(70.0, 75.0]"
6,Alabama,Louisville,401012246,6,25,75,100,TD,7,75,"(70.0, 75.0]"
7,Louisville,Alabama,401012246,6,86,31,55,PUNT,0,86,"(85.0, 90.0]"
8,Alabama,Louisville,401012246,5,22,19,41,PUNT,0,78,"(75.0, 80.0]"
9,Louisville,Alabama,401012246,4,80,20,60,PUNT,0,80,"(75.0, 80.0]"


In [2]:
grouped_df = pd.read_csv("Data/2018drives_grouped.tsv", sep="\t", index_col=False)
grouped_df

Unnamed: 0,start_distance,count,mean
0,1,2,0.000000
1,2,4,6.000000
2,3,7,4.428571
3,4,4,6.000000
4,5,2,7.000000
5,6,5,5.600000
6,7,11,5.181818
7,8,4,5.000000
8,9,7,5.428571
9,10,6,6.333333


In [3]:
from sklearn.linear_model import LinearRegression

x = np.array(grouped_df["start_distance"]).reshape(-1, 1)
y = np.array(grouped_df["mean"]).reshape(-1, 1)
weights = np.array(grouped_df["count"])

reg = LinearRegression().fit(x, y, sample_weight=weights)
print(reg.score(x, y, sample_weight=weights))
print()
print(reg.coef_)
print(reg.intercept_)

0.7514785927388814

[[-0.04650128]]
[5.42503064]


In [4]:
df = df[["offense", "defense", "game_id", "start_distance", "points"]].copy()
df["expectedPoints"] = reg.predict(np.array(df["start_distance"]).reshape(-1, 1))
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints
0,Alabama,Louisville,401012246,65,7,2.402448
1,Louisville,Alabama,401012246,75,0,1.937435
2,Alabama,Louisville,401012246,80,0,1.704929
3,Louisville,Alabama,401012246,83,0,1.565425
4,Alabama,Louisville,401012246,55,7,2.867460
5,Louisville,Alabama,401012246,75,0,1.937435
6,Alabama,Louisville,401012246,75,7,1.937435
7,Louisville,Alabama,401012246,86,0,1.425921
8,Alabama,Louisville,401012246,78,0,1.797931
9,Louisville,Alabama,401012246,80,0,1.704929


In [5]:
df["PRE"] = df["points"] - df["expectedPoints"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.402448,4.597552
1,Louisville,Alabama,401012246,75,0,1.937435,-1.937435
2,Alabama,Louisville,401012246,80,0,1.704929,-1.704929
3,Louisville,Alabama,401012246,83,0,1.565425,-1.565425
4,Alabama,Louisville,401012246,55,7,2.867460,4.132540
5,Louisville,Alabama,401012246,75,0,1.937435,-1.937435
6,Alabama,Louisville,401012246,75,7,1.937435,5.062565
7,Louisville,Alabama,401012246,86,0,1.425921,-1.425921
8,Alabama,Louisville,401012246,78,0,1.797931,-1.797931
9,Louisville,Alabama,401012246,80,0,1.704929,-1.704929


In [6]:
teams_df = df.groupby(by=["offense"]).agg(["mean"])["PRE"]
teams_df.columns = ["OPRE"]
teams_df.index.name = "team"
teams_df

Unnamed: 0_level_0,OPRE
team,Unnamed: 1_level_1
Air Force,-0.067525
Akron,-0.853194
Alabama,1.988831
Appalachian State,1.074953
Arizona,-0.155945
Arizona State,0.819782
Arkansas,-0.837777
Arkansas State,-0.697086
Army,1.130853
Auburn,-0.664957


In [7]:
defenses = df.groupby(by=["defense"]).agg(["mean"])["PRE"]
defenses.columns = ["DPRE"]
defenses.index.name = "team"
defenses

Unnamed: 0_level_0,DPRE
team,Unnamed: 1_level_1
Air Force,-0.184962
Akron,-0.545927
Alabama,-1.167503
Appalachian State,-0.813175
Arizona,-0.203520
Arizona State,0.097239
Arkansas,0.021910
Arkansas State,0.262869
Army,0.464808
Auburn,-0.874629


In [8]:
teams_df = pd.merge(teams_df, defenses, how="inner", left_index=True, right_index=True)
for index, row in teams_df.iterrows():
    print(index)
    print(row)

Air Force
OPRE   -0.067525
DPRE   -0.184962
Name: Air Force, dtype: float64
Akron
OPRE   -0.853194
DPRE   -0.545927
Name: Akron, dtype: float64
Alabama
OPRE    1.988831
DPRE   -1.167503
Name: Alabama, dtype: float64
Appalachian State
OPRE    1.074953
DPRE   -0.813175
Name: Appalachian State, dtype: float64
Arizona
OPRE   -0.155945
DPRE   -0.203520
Name: Arizona, dtype: float64
Arizona State
OPRE    0.819782
DPRE    0.097239
Name: Arizona State, dtype: float64
Arkansas
OPRE   -0.837777
DPRE    0.021910
Name: Arkansas, dtype: float64
Arkansas State
OPRE   -0.697086
DPRE    0.262869
Name: Arkansas State, dtype: float64
Army
OPRE    1.130853
DPRE    0.464808
Name: Army, dtype: float64
Auburn
OPRE   -0.664957
DPRE   -0.874629
Name: Auburn, dtype: float64
BYU
OPRE   -0.467371
DPRE    0.479384
Name: BYU, dtype: float64
Ball State
OPRE   -0.410634
DPRE    0.186749
Name: Ball State, dtype: float64
Baylor
OPRE    0.421282
DPRE    0.508085
Name: Baylor, dtype: float64
Boise State
OPRE    0.576625

In [9]:
teams_df.sort_values(by="OPRE", ascending=False).head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
UCF,2.391957,-0.407269
Alabama,1.988831,-1.167503
Oklahoma,1.931086,0.416868
Georgia,1.589102,-1.143748
Ohio State,1.322721,-0.681266
Georgia Tech,1.310679,0.406079
Washington State,1.255696,-0.303354
Memphis,1.163016,0.019778
Penn State,1.135793,-0.779402
Army,1.130853,0.464808


In [10]:
teams_df.sort_values(by="DPRE").head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,1.988831,-1.167503
Fresno State,0.496263,-1.151825
Georgia,1.589102,-1.143748
Miami,0.015732,-1.075427
Michigan,0.908482,-1.070441
Temple,-0.200218,-1.064917
Clemson,0.978515,-1.052957
Florida,-0.05489,-1.030466
Kentucky,-0.324408,-0.990587
TCU,-0.78484,-0.935539


In [11]:
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.402448,4.597552
1,Louisville,Alabama,401012246,75,0,1.937435,-1.937435
2,Alabama,Louisville,401012246,80,0,1.704929,-1.704929
3,Louisville,Alabama,401012246,83,0,1.565425,-1.565425
4,Alabama,Louisville,401012246,55,7,2.867460,4.132540
5,Louisville,Alabama,401012246,75,0,1.937435,-1.937435
6,Alabama,Louisville,401012246,75,7,1.937435,5.062565
7,Louisville,Alabama,401012246,86,0,1.425921,-1.425921
8,Alabama,Louisville,401012246,78,0,1.797931,-1.797931
9,Louisville,Alabama,401012246,80,0,1.704929,-1.704929


In [12]:
df["off_oppAdj"] = 0.0
df["def_oppAdj"] = 0.0
for index, row in df.iterrows():
    df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "DPRE"]
    df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "OPRE"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE,off_oppAdj,def_oppAdj
0,Alabama,Louisville,401012246,65,7,2.402448,4.597552,3.510340,2.608721
1,Louisville,Alabama,401012246,75,0,1.937435,-1.937435,-0.769932,-1.062888
2,Alabama,Louisville,401012246,80,0,1.704929,-1.704929,-2.792141,-3.693759
3,Louisville,Alabama,401012246,83,0,1.565425,-1.565425,-0.397922,-0.690878
4,Alabama,Louisville,401012246,55,7,2.867460,4.132540,3.045327,2.143709
5,Louisville,Alabama,401012246,75,0,1.937435,-1.937435,-0.769932,-1.062888
6,Alabama,Louisville,401012246,75,7,1.937435,5.062565,3.975352,3.073734
7,Louisville,Alabama,401012246,86,0,1.425921,-1.425921,-0.258418,-0.551374
8,Alabama,Louisville,401012246,78,0,1.797931,-1.797931,-2.885144,-3.786762
9,Louisville,Alabama,401012246,80,0,1.704929,-1.704929,-0.537426,-0.830381


In [13]:
teams_df[["aOPRE"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
teams_df[["aDPRE"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]
teams_df

Unnamed: 0_level_0,OPRE,DPRE,aOPRE,aDPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Air Force,-0.067525,-0.184962,-0.248559,-0.223260
Akron,-0.853194,-0.545927,-0.729542,-0.197030
Alabama,1.988831,-1.167503,1.458466,-0.900741
Appalachian State,1.074953,-0.813175,0.850898,-0.767903
Arizona,-0.155945,-0.203520,-0.476675,-0.028264
Arizona State,0.819782,0.097239,0.723287,0.132990
Arkansas,-0.837777,0.021910,-0.267935,-0.118334
Arkansas State,-0.697086,0.262869,-0.569361,-0.051698
Army,1.130853,0.464808,1.121729,-0.250611
Auburn,-0.664957,-0.874629,-0.151448,-0.829305


In [14]:
def printLeaders():
    print(teams_df.sort_values(by="aOPRE", ascending=False).head(10))
    print(teams_df.sort_values(by="aDPRE").head(10))

printLeaders()

                      OPRE      DPRE     aOPRE     aDPRE
team                                                    
Oklahoma          1.931086  0.416868  1.515096  0.274303
Alabama           1.988831 -1.167503  1.458466 -0.900741
Georgia           1.589102 -1.143748  1.389087 -0.993204
UCF               2.391957 -0.407269  1.217625  0.145319
Ohio State        1.322721 -0.681266  1.216907 -0.587070
Wisconsin         1.019615 -0.268821  1.138972  0.080118
Army              1.130853  0.464808  1.121729 -0.250611
Penn State        1.135793 -0.779402  1.059041 -1.154505
Georgia Tech      1.310679  0.406079  1.014145  0.452808
Washington State  1.255696 -0.303354  0.968970  0.236481
                OPRE      DPRE     aOPRE     aDPRE
team                                              
Clemson     0.978515 -1.052957  0.839291 -1.269414
Penn State  1.135793 -0.779402  1.059041 -1.154505
Georgia     1.589102 -1.143748  1.389087 -0.993204
TCU        -0.784840 -0.935539 -0.719851 -0.977194
Miami     

In [15]:
def adjust():
    for index, row in df.iterrows():
        df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "aDPRE"]
        df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "aOPRE"]

    teams_df[["aOPRE"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
    teams_df[["aDPRE"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]

In [16]:
for i in range(40):
    adjust()
    print(teams_df.at["Oklahoma", "aOPRE"])
printLeaders()

1.9585247314136869
1.7426610234716366
1.9379596660369427
1.8152616780594752
1.9163453316281067
1.8466516652745757
1.9027995027343803
1.862715202487499
1.8954038987610342
1.8719139053789786
1.8915758472426276
1.877564026796676
1.889650913059416
1.8811710722462578
1.888706345053535
1.883517349442213
1.8882565907526658
1.8850552553135522
1.8880520688032705
1.8860652890610143
1.887966352945317
1.8867281548734516
1.8879362706202778
1.8871623681197967
1.8879307966994432
1.887446167867009
1.887935078228608
1.8876312478195352
1.8879422916529758
1.887751702721812
1.8879495236113217
1.8878299578772426
1.8879557077395508
1.887880719349172
1.8879606063462955
1.8879136038175142
1.8879643191784632
1.887934883949865
1.887967055137525
1.887948642224995
                      OPRE      DPRE     aOPRE     aDPRE
team                                                    
Georgia           1.589102 -1.143748  1.943530 -1.642900
Oklahoma          1.931086  0.416868  1.887949  0.112701
Alabama           1.98883

In [17]:
teams_df["net_aPRE"] = teams_df["aOPRE"] - teams_df["aDPRE"]

In [21]:
teams_df.sort_values(by="net_aPRE", ascending=False).head(25).reset_index()

Unnamed: 0,team,OPRE,DPRE,aOPRE,aDPRE,net_aPRE
0,Georgia,1.589102,-1.143748,1.94353,-1.6429,3.58643
1,Alabama,1.988831,-1.167503,1.806401,-1.157251,2.963652
2,Clemson,0.978515,-1.052957,0.873173,-1.54196,2.415133
3,Penn State,1.135793,-0.779402,1.121248,-1.102114,2.223362
4,Michigan,0.908482,-1.070441,1.045275,-1.098984,2.144259
5,Ohio State,1.322721,-0.681266,1.447893,-0.662797,2.110689
6,West Virginia,1.02471,-0.608787,1.241499,-0.840275,2.081774
7,Kentucky,-0.324408,-0.990587,0.543301,-1.491277,2.034579
8,Mississippi State,0.441157,-0.903865,0.970384,-1.059696,2.03008
9,Florida,-0.05489,-1.030466,0.422817,-1.606943,2.02976
