In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("Data/2018drives_augmented.tsv", sep="\t", index_col=False)
df

Unnamed: 0,offense,defense,game_id,plays,start_yardline,yards,end_yardline,drive_result,points,start_distance,dist_range
0,Alabama,Louisville,401012246,7,35,65,100,TD,7,65,"(60.0, 65.0]"
1,Louisville,Alabama,401012246,4,75,20,55,PUNT,0,75,"(70.0, 75.0]"
2,Alabama,Louisville,401012246,8,20,63,65,FUMBLE,0,80,"(75.0, 80.0]"
3,Louisville,Alabama,401012246,3,83,-1,84,PUNT,0,83,"(80.0, 85.0]"
4,Alabama,Louisville,401012246,6,45,55,100,TD,7,55,"(50.0, 55.0]"
5,Louisville,Alabama,401012246,8,75,49,26,INT,0,75,"(70.0, 75.0]"
6,Alabama,Louisville,401012246,6,25,75,100,TD,7,75,"(70.0, 75.0]"
7,Louisville,Alabama,401012246,6,86,31,55,PUNT,0,86,"(85.0, 90.0]"
8,Alabama,Louisville,401012246,5,22,19,41,PUNT,0,78,"(75.0, 80.0]"
9,Louisville,Alabama,401012246,4,80,20,60,PUNT,0,80,"(75.0, 80.0]"


In [2]:
grouped_df = pd.read_csv("Data/2018drives_grouped.tsv", sep="\t", index_col=False)
grouped_df

Unnamed: 0,start_distance,count,mean
0,1,3,2.333333
1,2,5,4.800000
2,3,9,5.000000
3,4,4,6.000000
4,5,1,7.000000
5,6,6,5.833333
6,7,11,5.181818
7,8,4,5.000000
8,9,7,5.428571
9,10,8,5.625000


In [3]:
from sklearn.linear_model import LinearRegression

x = np.array(grouped_df["start_distance"]).reshape(-1, 1)
y = np.array(grouped_df["mean"]).reshape(-1, 1)
weights = np.array(grouped_df["count"])

reg = LinearRegression().fit(x, y, sample_weight=weights)
print(reg.score(x, y, sample_weight=weights))
print()
print(reg.coef_)
print(reg.intercept_)

0.7717097929680559

[[-0.04519027]]
[5.326292]


In [4]:
df = df[["offense", "defense", "game_id", "start_distance", "points"]].copy()
df["expectedPoints"] = reg.predict(np.array(df["start_distance"]).reshape(-1, 1))
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints
0,Alabama,Louisville,401012246,65,7,2.388924
1,Louisville,Alabama,401012246,75,0,1.937022
2,Alabama,Louisville,401012246,80,0,1.711070
3,Louisville,Alabama,401012246,83,0,1.575499
4,Alabama,Louisville,401012246,55,7,2.840827
5,Louisville,Alabama,401012246,75,0,1.937022
6,Alabama,Louisville,401012246,75,7,1.937022
7,Louisville,Alabama,401012246,86,0,1.439929
8,Alabama,Louisville,401012246,78,0,1.801451
9,Louisville,Alabama,401012246,80,0,1.711070


In [5]:
df["PRE"] = df["points"] - df["expectedPoints"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.388924,4.611076
1,Louisville,Alabama,401012246,75,0,1.937022,-1.937022
2,Alabama,Louisville,401012246,80,0,1.711070,-1.711070
3,Louisville,Alabama,401012246,83,0,1.575499,-1.575499
4,Alabama,Louisville,401012246,55,7,2.840827,4.159173
5,Louisville,Alabama,401012246,75,0,1.937022,-1.937022
6,Alabama,Louisville,401012246,75,7,1.937022,5.062978
7,Louisville,Alabama,401012246,86,0,1.439929,-1.439929
8,Alabama,Louisville,401012246,78,0,1.801451,-1.801451
9,Louisville,Alabama,401012246,80,0,1.711070,-1.711070


In [6]:
teams_df = df.groupby(by=["offense"]).agg(["mean"])["PRE"]
teams_df.columns = ["OPRE"]
teams_df.index.name = "team"
teams_df

Unnamed: 0_level_0,OPRE
team,Unnamed: 1_level_1
Air Force,-0.420602
Akron,-0.980367
Alabama,1.800222
Appalachian State,0.769018
Arizona,-0.304328
Arizona State,0.823218
Arkansas,-0.559415
Arkansas State,-0.823830
Army,1.231761
Auburn,-0.603229


In [7]:
defenses = df.groupby(by=["defense"]).agg(["mean"])["PRE"]
defenses.columns = ["DPRE"]
defenses.index.name = "team"
defenses

Unnamed: 0_level_0,DPRE
team,Unnamed: 1_level_1
Air Force,-0.305076
Akron,-0.402717
Alabama,-1.195342
Appalachian State,-0.933282
Arizona,0.102072
Arizona State,0.095289
Arkansas,0.295526
Arkansas State,0.215809
Army,-0.248144
Auburn,-0.783265


In [8]:
teams_df = pd.merge(teams_df, defenses, how="inner", left_index=True, right_index=True)
for index, row in teams_df.iterrows():
    print(index)
    print(row)

Air Force
OPRE   -0.420602
DPRE   -0.305076
Name: Air Force, dtype: float64
Akron
OPRE   -0.980367
DPRE   -0.402717
Name: Akron, dtype: float64
Alabama
OPRE    1.800222
DPRE   -1.195342
Name: Alabama, dtype: float64
Appalachian State
OPRE    0.769018
DPRE   -0.933282
Name: Appalachian State, dtype: float64
Arizona
OPRE   -0.304328
DPRE    0.102072
Name: Arizona, dtype: float64
Arizona State
OPRE    0.823218
DPRE    0.095289
Name: Arizona State, dtype: float64
Arkansas
OPRE   -0.559415
DPRE    0.295526
Name: Arkansas, dtype: float64
Arkansas State
OPRE   -0.823830
DPRE    0.215809
Name: Arkansas State, dtype: float64
Army
OPRE    1.231761
DPRE   -0.248144
Name: Army, dtype: float64
Auburn
OPRE   -0.603229
DPRE   -0.783265
Name: Auburn, dtype: float64
BYU
OPRE   -0.040477
DPRE    0.413653
Name: BYU, dtype: float64
Ball State
OPRE   -0.250014
DPRE    0.228664
Name: Ball State, dtype: float64
Baylor
OPRE    0.301815
DPRE    0.411900
Name: Baylor, dtype: float64
Boise State
OPRE    0.559060

Name: UAB, dtype: float64
UCF
OPRE    1.865559
DPRE   -0.258922
Name: UCF, dtype: float64
UCLA
OPRE   -0.260703
DPRE    0.563322
Name: UCLA, dtype: float64
UMass
OPRE   -0.024346
DPRE    1.375971
Name: UMass, dtype: float64
UNLV
OPRE   -0.217401
DPRE    0.645301
Name: UNLV, dtype: float64
USC
OPRE   -0.092083
DPRE   -0.267540
Name: USC, dtype: float64
UT San Antonio
OPRE   -0.930764
DPRE    0.122707
Name: UT San Antonio, dtype: float64
UTEP
OPRE   -0.533357
DPRE    0.637931
Name: UTEP, dtype: float64
Utah
OPRE    0.012587
DPRE   -0.708888
Name: Utah, dtype: float64
Utah State
OPRE    1.052193
DPRE   -0.351494
Name: Utah State, dtype: float64
Vanderbilt
OPRE   -0.021905
DPRE    0.200379
Name: Vanderbilt, dtype: float64
Virginia
OPRE    0.422776
DPRE   -0.080677
Name: Virginia, dtype: float64
Virginia Tech
OPRE   -0.235016
DPRE   -0.185778
Name: Virginia Tech, dtype: float64
Wake Forest
OPRE   -0.388571
DPRE    0.441060
Name: Wake Forest, dtype: float64
Washington
OPRE    0.570042
DPRE  

In [9]:
teams_df.sort_values(by="OPRE", ascending=False).head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
Oklahoma,1.940676,0.419186
UCF,1.865559,-0.258922
Alabama,1.800222,-1.195342
Ohio State,1.345341,-0.583016
Washington State,1.259223,-0.296087
Army,1.231761,-0.248144
Georgia,1.171342,-0.81995
Utah State,1.052193,-0.351494
Memphis,1.020416,0.042754
Clemson,0.982599,-1.045869


In [10]:
teams_df.sort_values(by="DPRE").head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,1.800222,-1.195342
Michigan,0.891426,-1.102152
Miami,0.024514,-1.066648
Clemson,0.982599,-1.045869
Kentucky,-0.315098,-0.989682
Appalachian State,0.769018,-0.933282
Temple,-0.13394,-0.932056
UAB,0.145471,-0.9018
Mississippi State,0.439918,-0.893121
North Texas,0.361254,-0.882311


In [11]:
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.388924,4.611076
1,Louisville,Alabama,401012246,75,0,1.937022,-1.937022
2,Alabama,Louisville,401012246,80,0,1.711070,-1.711070
3,Louisville,Alabama,401012246,83,0,1.575499,-1.575499
4,Alabama,Louisville,401012246,55,7,2.840827,4.159173
5,Louisville,Alabama,401012246,75,0,1.937022,-1.937022
6,Alabama,Louisville,401012246,75,7,1.937022,5.062978
7,Louisville,Alabama,401012246,86,0,1.439929,-1.439929
8,Alabama,Louisville,401012246,78,0,1.801451,-1.801451
9,Louisville,Alabama,401012246,80,0,1.711070,-1.711070


In [12]:
df["off_oppAdj"] = 0.0
df["def_oppAdj"] = 0.0
for index, row in df.iterrows():
    df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "DPRE"]
    df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "OPRE"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE,off_oppAdj,def_oppAdj
0,Alabama,Louisville,401012246,65,7,2.388924,4.611076,3.683647,2.810854
1,Louisville,Alabama,401012246,75,0,1.937022,-1.937022,-0.741680,-1.028206
2,Alabama,Louisville,401012246,80,0,1.711070,-1.711070,-2.638498,-3.511292
3,Louisville,Alabama,401012246,83,0,1.575499,-1.575499,-0.380158,-0.666684
4,Alabama,Louisville,401012246,55,7,2.840827,4.159173,3.231745,2.358951
5,Louisville,Alabama,401012246,75,0,1.937022,-1.937022,-0.741680,-1.028206
6,Alabama,Louisville,401012246,75,7,1.937022,5.062978,4.135550,3.262757
7,Louisville,Alabama,401012246,86,0,1.439929,-1.439929,-0.244587,-0.531113
8,Alabama,Louisville,401012246,78,0,1.801451,-1.801451,-2.728879,-3.601672
9,Louisville,Alabama,401012246,80,0,1.711070,-1.711070,-0.515729,-0.802255


In [13]:
teams_df[["aOPPD"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
teams_df[["aDPPD"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]
teams_df

Unnamed: 0_level_0,OPRE,DPRE,aOPPD,aDPPD
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Air Force,-0.420602,-0.305076,-0.479428,-0.296528
Akron,-0.980367,-0.402717,-0.722812,-0.345373
Alabama,1.800222,-1.195342,1.286512,-1.008313
Appalachian State,0.769018,-0.933282,0.598055,-0.668327
Arizona,-0.304328,0.102072,-0.371984,0.229442
Arizona State,0.823218,0.095289,0.749625,0.273929
Arkansas,-0.559415,0.295526,-0.096414,0.025015
Arkansas State,-0.823830,0.215809,-0.489003,-0.093592
Army,1.231761,-0.248144,1.141348,-0.440988
Auburn,-0.603229,-0.783265,-0.332089,-0.704804


In [14]:
def printLeaders():
    print(teams_df.sort_values(by="aOPPD", ascending=False).head(10))
    print(teams_df.sort_values(by="aDPPD").head(10))

printLeaders()

                      OPRE      DPRE     aOPPD     aDPPD
team                                                    
Oklahoma          1.940676  0.419186  1.721007  0.236264
Alabama           1.800222 -1.195342  1.286512 -1.008313
Ohio State        1.345341 -0.583016  1.146295 -0.335161
Army              1.231761 -0.248144  1.141348 -0.440988
Georgia           1.171342 -0.819950  1.067549 -0.716002
UCF               1.865559 -0.258922  0.972633 -0.030518
Utah State        1.052193 -0.351494  0.971266 -0.128620
Clemson           0.982599 -1.045869  0.909163 -1.134435
Washington State  1.259223 -0.296087  0.899794  0.238662
Georgia Tech      0.956538  0.296779  0.872517  0.375293
                     OPRE      DPRE     aOPPD     aDPPD
team                                                   
Clemson          0.982599 -1.045869  0.909163 -1.134435
Michigan         0.891426 -1.102152  0.840471 -1.029490
TCU             -0.799882 -0.843964 -0.823383 -1.009921
Alabama          1.800222 -1.195342 

In [15]:
def adjust():
    for index, row in df.iterrows():
        df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "aDPPD"]
        df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "aOPPD"]

    teams_df[["aOPPD"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
    teams_df[["aDPPD"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]

In [16]:
for i in range(40):
    adjust()
    print(teams_df.at["Oklahoma", "aOPPD"])
printLeaders()

2.002793843398037
1.885629828565578
1.9773873589079938
1.9248013978404728
1.9598926369518894
1.9367001874063203
1.9513663711662856
1.9409895116906077
1.947524699949005
1.9427980951517576
1.945845631210338
1.943662425145366
1.9451253334519918
1.94410763984178
1.9448206859802153
1.9443437133226649
1.9446927827645257
1.9444685069902041
1.9446385778614361
1.9445328190065139
1.9446145545777842
1.9445644518782756
1.9446027876330465
1.9445788333193947
1.9445961010419635
1.9445844494695883
1.9445917013843639
1.9445858664389213
1.944588514546622
1.944585461993369
1.9445861089725478
1.9445844181452947
1.944584280573756
1.9445832821477973
1.9445829033884203
1.9445822769334327
1.944581881203346
1.9445814684614615
1.9445811342782193
1.944580853045055
                       OPRE      DPRE     aOPPD     aDPPD
team                                                     
Oklahoma           1.940676  0.419186  1.944581  0.023562
Alabama            1.800222 -1.195342  1.646513 -1.443500
Georgia            1

In [17]:
teams_df["net_aPPD"] = teams_df["aOPPD"] - teams_df["aDPPD"]

In [18]:
teams_df.sort_values(by="net_aPPD", ascending=False).head(25).reset_index()

Unnamed: 0,team,OPRE,DPRE,aOPPD,aDPPD,net_aPPD
0,Alabama,1.800222,-1.195342,1.646513,-1.4435,3.090012
1,Georgia,1.171342,-0.81995,1.520106,-1.323688,2.843794
2,Clemson,0.982599,-1.045869,0.838915,-1.348395,2.18731
3,Michigan,0.891426,-1.102152,0.917572,-1.267173,2.184745
4,Mississippi State,0.439918,-0.893121,0.992387,-1.188666,2.181053
5,Florida,0.145238,-0.857694,0.715899,-1.400041,2.11594
6,Kentucky,-0.315098,-0.989682,0.418296,-1.5878,2.006096
7,LSU,0.203574,-0.518024,1.048307,-0.920012,1.968319
8,Oklahoma,1.940676,0.419186,1.944581,0.023562,1.921019
9,Ohio State,1.345341,-0.583016,1.357191,-0.490981,1.848172


In [21]:
teams_df.to_csv("Data/teams_aPPD.tsv", sep="\t")