In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("Data/2018drives_augmented.tsv", sep="\t", index_col=False)
df

Unnamed: 0,offense,defense,game_id,id,plays,start_yardline,yards,end_yardline,drive_result,points,start_distance,dist_range
0,Alabama,Louisville,401012246,4010122461,7,35,65,100,TD,7,65,"(60.0, 65.0]"
1,Louisville,Alabama,401012246,4010122462,4,75,20,55,PUNT,0,75,"(70.0, 75.0]"
2,Alabama,Louisville,401012246,4010122463,8,20,63,65,FUMBLE,0,80,"(75.0, 80.0]"
3,Louisville,Alabama,401012246,4010122464,3,83,-1,84,PUNT,0,83,"(80.0, 85.0]"
4,Alabama,Louisville,401012246,4010122465,6,45,55,100,TD,7,55,"(50.0, 55.0]"
5,Louisville,Alabama,401012246,4010122466,8,75,49,26,INT,0,75,"(70.0, 75.0]"
6,Alabama,Louisville,401012246,4010122467,6,25,75,100,TD,7,75,"(70.0, 75.0]"
7,Louisville,Alabama,401012246,4010122468,6,86,31,55,PUNT,0,86,"(85.0, 90.0]"
8,Alabama,Louisville,401012246,4010122469,5,22,19,41,PUNT,0,78,"(75.0, 80.0]"
9,Louisville,Alabama,401012246,40101224610,4,80,20,60,PUNT,0,80,"(75.0, 80.0]"


In [2]:
grouped_df = pd.read_csv("Data/2018drives_grouped.tsv", sep="\t", index_col=False)
grouped_df

Unnamed: 0,start_distance,driveCount,meanPPD
0,3,16,5.875000
1,7,14,5.285714
2,9,14,5.071429
3,10,11,5.000000
4,11,15,5.000000
5,12,11,6.000000
6,13,16,4.687500
7,14,16,4.375000
8,15,13,5.230769
9,17,10,5.200000


In [3]:
from sklearn.linear_model import LinearRegression

x = np.array(grouped_df["start_distance"]).reshape(-1, 1)
y = np.array(grouped_df["meanPPD"]).reshape(-1, 1)
weights = np.array(grouped_df["driveCount"])

reg = LinearRegression().fit(x, y, sample_weight=weights)
print(reg.score(x, y, sample_weight=weights))
print()
print(reg.coef_)
print(reg.intercept_)

0.8671357752559365

[[-0.04543188]]
[5.35878273]


In [4]:
df = df[["offense", "defense", "game_id", "start_distance", "points"]].copy()
df["expectedPoints"] = reg.predict(np.array(df["start_distance"]).reshape(-1, 1))
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints
0,Alabama,Louisville,401012246,65,7,2.405711
1,Louisville,Alabama,401012246,75,0,1.951392
2,Alabama,Louisville,401012246,80,0,1.724232
3,Louisville,Alabama,401012246,83,0,1.587937
4,Alabama,Louisville,401012246,55,7,2.860029
5,Louisville,Alabama,401012246,75,0,1.951392
6,Alabama,Louisville,401012246,75,7,1.951392
7,Louisville,Alabama,401012246,86,0,1.451641
8,Alabama,Louisville,401012246,78,0,1.815096
9,Louisville,Alabama,401012246,80,0,1.724232


In [5]:
df["PRE"] = df["points"] - df["expectedPoints"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.405711,4.594289
1,Louisville,Alabama,401012246,75,0,1.951392,-1.951392
2,Alabama,Louisville,401012246,80,0,1.724232,-1.724232
3,Louisville,Alabama,401012246,83,0,1.587937,-1.587937
4,Alabama,Louisville,401012246,55,7,2.860029,4.139971
5,Louisville,Alabama,401012246,75,0,1.951392,-1.951392
6,Alabama,Louisville,401012246,75,7,1.951392,5.048608
7,Louisville,Alabama,401012246,86,0,1.451641,-1.451641
8,Alabama,Louisville,401012246,78,0,1.815096,-1.815096
9,Louisville,Alabama,401012246,80,0,1.724232,-1.724232


In [6]:
teams_df = df.groupby(by=["offense"]).agg(["mean"])["PRE"]
teams_df.columns = ["OPRE"]
teams_df.index.name = "team"
teams_df

Unnamed: 0_level_0,OPRE
team,Unnamed: 1_level_1
Air Force,0.158356
Akron,-0.858427
Alabama,1.683823
Appalachian State,0.085688
Arizona,0.042118
Arizona State,0.597573
Arkansas,-0.414750
Arkansas State,-0.034893
Army,1.167464
Auburn,-0.366121


In [7]:
defenses = df.groupby(by=["defense"]).agg(["mean"])["PRE"]
defenses.columns = ["DPRE"]
defenses.index.name = "team"
defenses

Unnamed: 0_level_0,DPRE
team,Unnamed: 1_level_1
Air Force,0.252103
Akron,-0.543090
Alabama,-1.237866
Appalachian State,-0.854823
Arizona,0.003207
Arizona State,-0.100543
Arkansas,0.185174
Arkansas State,0.291684
Army,-0.102212
Auburn,-0.677154


In [8]:
teams_df = pd.merge(teams_df, defenses, how="inner", left_index=True, right_index=True)
for index, row in teams_df.iterrows():
    print(index)
    print(row)

Air Force
OPRE    0.158356
DPRE    0.252103
Name: Air Force, dtype: float64
Akron
OPRE   -0.858427
DPRE   -0.543090
Name: Akron, dtype: float64
Alabama
OPRE    1.683823
DPRE   -1.237866
Name: Alabama, dtype: float64
Appalachian State
OPRE    0.085688
DPRE   -0.854823
Name: Appalachian State, dtype: float64
Arizona
OPRE    0.042118
DPRE    0.003207
Name: Arizona, dtype: float64
Arizona State
OPRE    0.597573
DPRE   -0.100543
Name: Arizona State, dtype: float64
Arkansas
OPRE   -0.414750
DPRE    0.185174
Name: Arkansas, dtype: float64
Arkansas State
OPRE   -0.034893
DPRE    0.291684
Name: Arkansas State, dtype: float64
Army
OPRE    1.167464
DPRE   -0.102212
Name: Army, dtype: float64
Auburn
OPRE   -0.366121
DPRE   -0.677154
Name: Auburn, dtype: float64
BYU
OPRE   -0.298704
DPRE    0.050326
Name: BYU, dtype: float64
Ball State
OPRE   -0.494670
DPRE    0.539633
Name: Ball State, dtype: float64
Baylor
OPRE    0.160828
DPRE    0.517793
Name: Baylor, dtype: float64
Boise State
OPRE    0.672977

In [9]:
teams_df.sort_values(by="OPRE", ascending=False).head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
Oklahoma,2.029743,0.313991
Alabama,1.683823,-1.237866
UCF,1.67822,-0.286835
Army,1.167464,-0.102212
Washington State,1.16471,-0.128919
Ohio,1.146098,0.17352
Clemson,1.128651,-1.166056
Georgia,1.027604,-0.709416
Georgia Tech,1.013219,0.343894
Ohio State,0.945848,-0.247491


In [10]:
teams_df.sort_values(by="DPRE").head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
Michigan,0.657689,-1.324103
Alabama,1.683823,-1.237866
Clemson,1.128651,-1.166056
UAB,0.413851,-1.122131
Mississippi State,0.43302,-1.058755
Miami,-0.205851,-1.006859
Fresno State,0.598783,-0.964845
Utah State,0.724551,-0.932884
Cincinnati,0.274503,-0.91081
Kentucky,-0.468945,-0.897209


In [11]:
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.405711,4.594289
1,Louisville,Alabama,401012246,75,0,1.951392,-1.951392
2,Alabama,Louisville,401012246,80,0,1.724232,-1.724232
3,Louisville,Alabama,401012246,83,0,1.587937,-1.587937
4,Alabama,Louisville,401012246,55,7,2.860029,4.139971
5,Louisville,Alabama,401012246,75,0,1.951392,-1.951392
6,Alabama,Louisville,401012246,75,7,1.951392,5.048608
7,Louisville,Alabama,401012246,86,0,1.451641,-1.451641
8,Alabama,Louisville,401012246,78,0,1.815096,-1.815096
9,Louisville,Alabama,401012246,80,0,1.724232,-1.724232


In [12]:
df["off_oppAdj"] = 0.0
df["def_oppAdj"] = 0.0

for index, row in df.iterrows():
    df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "DPRE"]
    df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "OPRE"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE,off_oppAdj,def_oppAdj
0,Alabama,Louisville,401012246,65,7,2.405711,4.594289,3.219086,2.910466
1,Louisville,Alabama,401012246,75,0,1.951392,-1.951392,-0.713526,-1.081523
2,Alabama,Louisville,401012246,80,0,1.724232,-1.724232,-3.099436,-3.408056
3,Louisville,Alabama,401012246,83,0,1.587937,-1.587937,-0.350071,-0.718068
4,Alabama,Louisville,401012246,55,7,2.860029,4.139971,2.764767,2.456147
5,Louisville,Alabama,401012246,75,0,1.951392,-1.951392,-0.713526,-1.081523
6,Alabama,Louisville,401012246,75,7,1.951392,5.048608,3.673405,3.364785
7,Louisville,Alabama,401012246,86,0,1.451641,-1.451641,-0.213775,-0.581773
8,Alabama,Louisville,401012246,78,0,1.815096,-1.815096,-3.190300,-3.498920
9,Louisville,Alabama,401012246,80,0,1.724232,-1.724232,-0.486367,-0.854364


In [13]:
teams_df[["off_aPPD"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
teams_df[["def_aPPD"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]
teams_df

Unnamed: 0_level_0,OPRE,DPRE,off_aPPD,def_aPPD
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Air Force,0.158356,0.252103,0.178647,0.243642
Akron,-0.858427,-0.543090,-0.709758,-0.180349
Alabama,1.683823,-1.237866,1.323039,-1.166224
Appalachian State,0.085688,-0.854823,-0.285435,-0.824919
Arizona,0.042118,0.003207,0.008009,0.042335
Arizona State,0.597573,-0.100543,0.639382,0.121347
Arkansas,-0.414750,0.185174,-0.135619,0.061097
Arkansas State,-0.034893,0.291684,-0.268930,0.149748
Army,1.167464,-0.102212,0.973859,-0.303364
Auburn,-0.366121,-0.677154,-0.140089,-0.582087


In [14]:
def printLeaders():
    print(teams_df.sort_values(by="off_aPPD", ascending=False).head(25))
    print(teams_df.sort_values(by="def_aPPD").head(25))
    print()

printLeaders()

                       OPRE      DPRE  off_aPPD  def_aPPD
team                                                     
Oklahoma           2.029743  0.313991  1.896206  0.245774
Alabama            1.683823 -1.237866  1.323039 -1.166224
UCF                1.678220 -0.286835  1.278901 -0.107398
Georgia            1.027604 -0.709416  1.245102 -0.614176
Washington State   1.164710 -0.128919  1.154964  0.202663
NC State           0.791134 -0.004901  1.033998 -0.121142
Army               1.167464 -0.102212  0.973859 -0.303364
Clemson            1.128651 -1.166056  0.938182 -1.189260
Ohio               1.146098  0.173520  0.919954  0.311515
Michigan           0.657689 -1.324103  0.901091 -1.279253
Louisiana          0.556335  1.355499  0.839574  1.245695
Georgia Tech       1.013219  0.343894  0.785039  0.418259
West Virginia      0.904522 -0.150737  0.769455 -0.151479
Ohio State         0.945848 -0.247491  0.737120 -0.051662
Ole Miss           0.638130  0.634304  0.711244  0.458318
Purdue        

In [15]:
def adjust():
    for index, row in df.iterrows():
        df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "def_aPPD"]
        df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "off_aPPD"]

    teams_df[["off_aPPD"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
    teams_df[["def_aPPD"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]

In [16]:
for i in range(40):
    adjust()
    print(teams_df.at["Oklahoma", "off_aPPD"])
printLeaders()

2.2164646354139075
2.1437254513116653
2.245410591316909
2.2157453555520528
2.2598763559784087
2.2473769279834714
2.2697225370224055
2.2640137168691656
2.2763137014892134
2.27341718823815
2.280547753893188
2.278891892346591
2.2831927342520255
2.282119257490191
2.284817327553737
2.284031567235551
2.285804941471893
2.285166738102756
2.2864013809993904
2.2858407979109763
2.2867599570557893
2.2862408870701683
2.286974801555243
2.2864781755734875
2.287103168306165
2.286618768092671
2.2871796718877504
2.286701970899605
2.2872251535689845
2.286751144051253
2.287252123358348
2.286780160534845
2.287268071688357
2.2867972521127986
2.2872774735552337
2.286807298512884
2.287282996767328
2.2868131892650863
2.2872862283240756
2.2868166333023936
                       OPRE      DPRE  off_aPPD  def_aPPD
team                                                     
Oklahoma           2.029743  0.313991  2.286817  0.127634
Georgia            1.027604 -0.709416  1.806085 -1.239632
Alabama            1.683823 

In [17]:
teams_df["net_aPPD"] = teams_df["off_aPPD"] - teams_df["def_aPPD"]

In [18]:
teams_df.sort_values(by="net_aPPD", ascending=False).head(25).reset_index()

Unnamed: 0,team,OPRE,DPRE,off_aPPD,def_aPPD,net_aPPD
0,Alabama,1.683823,-1.237866,1.701056,-1.647095,3.348151
1,Georgia,1.027604,-0.709416,1.806085,-1.239632,3.045717
2,Michigan,0.657689,-1.324103,1.053421,-1.466366,2.519787
3,Mississippi State,0.43302,-1.058755,1.00302,-1.446577,2.449597
4,Clemson,1.128651,-1.166056,0.955757,-1.41367,2.369427
5,Oklahoma,2.029743,0.313991,2.286817,0.127634,2.159183
6,Kentucky,-0.468945,-0.897209,0.253222,-1.671289,1.924511
7,Missouri,0.16483,-0.173523,0.976808,-0.941887,1.918696
8,Florida,-0.027555,-0.504674,0.757152,-1.160336,1.917488
9,LSU,-0.235804,-0.484206,0.683888,-1.207618,1.891506


In [19]:
teams_df["AP"] = "NR"
teams_df

Unnamed: 0_level_0,OPRE,DPRE,off_aPPD,def_aPPD,net_aPPD,AP
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Air Force,0.158356,0.252103,0.067507,0.428763,-0.361256,NR
Akron,-0.858427,-0.543090,-0.738194,-0.254636,-0.483558,NR
Alabama,1.683823,-1.237866,1.701056,-1.647095,3.348151,NR
Appalachian State,0.085688,-0.854823,-0.509282,-0.724476,0.215194,NR
Arizona,0.042118,0.003207,0.038807,0.079280,-0.040473,NR
Arizona State,0.597573,-0.100543,0.721161,0.035741,0.685420,NR
Arkansas,-0.414750,0.185174,-0.038732,-0.180036,0.141304,NR
Arkansas State,-0.034893,0.291684,-0.468295,0.315167,-0.783462,NR
Army,1.167464,-0.102212,0.837814,-0.184243,1.022057,NR
Auburn,-0.366121,-0.677154,0.137912,-1.081153,1.219065,NR


In [20]:
teams_df.to_csv("Data/teams_aPPD.tsv", sep="\t")

In [21]:
teams_df.describe()

Unnamed: 0,OPRE,DPRE,off_aPPD,def_aPPD,net_aPPD
count,130.0,130.0,130.0,130.0,130.0
mean,0.011513,0.005589,0.013005,0.004865,0.008139
std,0.605873,0.61898,0.706899,0.73296,1.198907
min,-1.0981,-1.324103,-1.676344,-1.671289,-2.835054
25%,-0.438865,-0.449834,-0.476017,-0.489064,-0.817126
50%,-0.029574,-0.005272,-0.011844,0.038233,-0.021149
75%,0.380285,0.328326,0.526404,0.431761,0.914512
max,2.029743,2.020228,2.286817,2.018945,3.348151
