In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("Data/2018drives_augmented.tsv", sep="\t", index_col=False)
df

Unnamed: 0,offense,defense,game_id,id,plays,start_yardline,yards,end_yardline,drive_result,points,start_distance,dist_range
0,Alabama,Louisville,401012246,4010122461,7,35,65,100,TD,7,65,"(60.0, 65.0]"
1,Louisville,Alabama,401012246,4010122462,4,75,20,55,PUNT,0,75,"(70.0, 75.0]"
2,Alabama,Louisville,401012246,4010122463,8,20,63,65,FUMBLE,0,80,"(75.0, 80.0]"
3,Louisville,Alabama,401012246,4010122464,3,83,-1,84,PUNT,0,83,"(80.0, 85.0]"
4,Alabama,Louisville,401012246,4010122465,6,45,55,100,TD,7,55,"(50.0, 55.0]"
5,Louisville,Alabama,401012246,4010122466,8,75,49,26,INT,0,75,"(70.0, 75.0]"
6,Alabama,Louisville,401012246,4010122467,6,25,75,100,TD,7,75,"(70.0, 75.0]"
7,Louisville,Alabama,401012246,4010122468,6,86,31,55,PUNT,0,86,"(85.0, 90.0]"
8,Alabama,Louisville,401012246,4010122469,5,22,19,41,PUNT,0,78,"(75.0, 80.0]"
9,Louisville,Alabama,401012246,40101224610,4,80,20,60,PUNT,0,80,"(75.0, 80.0]"


In [2]:
grouped_df = pd.read_csv("Data/2018drives_grouped.tsv", sep="\t", index_col=False)
grouped_df

Unnamed: 0,start_distance,driveCount,meanPPD
0,3,15,5.800000
1,7,12,5.333333
2,9,11,5.272727
3,10,11,5.000000
4,11,13,4.692308
5,13,15,4.533333
6,14,15,4.666667
7,15,12,5.666667
8,18,14,3.857143
9,19,10,3.000000


In [3]:
from sklearn.linear_model import LinearRegression

x = np.array(grouped_df["start_distance"]).reshape(-1, 1)
y = np.array(grouped_df["meanPPD"]).reshape(-1, 1)
weights = np.array(grouped_df["driveCount"])

reg = LinearRegression().fit(x, y, sample_weight=weights)
print(reg.score(x, y, sample_weight=weights))
print()
print(reg.coef_)
print(reg.intercept_)

0.8277599759793306

[[-0.04520032]]
[5.32487387]


In [4]:
df = df[["offense", "defense", "game_id", "start_distance", "points"]].copy()
df["expectedPoints"] = reg.predict(np.array(df["start_distance"]).reshape(-1, 1))
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints
0,Alabama,Louisville,401012246,65,7,2.386853
1,Louisville,Alabama,401012246,75,0,1.934850
2,Alabama,Louisville,401012246,80,0,1.708848
3,Louisville,Alabama,401012246,83,0,1.573247
4,Alabama,Louisville,401012246,55,7,2.838856
5,Louisville,Alabama,401012246,75,0,1.934850
6,Alabama,Louisville,401012246,75,7,1.934850
7,Louisville,Alabama,401012246,86,0,1.437646
8,Alabama,Louisville,401012246,78,0,1.799249
9,Louisville,Alabama,401012246,80,0,1.708848


In [5]:
df["PRE"] = df["points"] - df["expectedPoints"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.386853,4.613147
1,Louisville,Alabama,401012246,75,0,1.934850,-1.934850
2,Alabama,Louisville,401012246,80,0,1.708848,-1.708848
3,Louisville,Alabama,401012246,83,0,1.573247,-1.573247
4,Alabama,Louisville,401012246,55,7,2.838856,4.161144
5,Louisville,Alabama,401012246,75,0,1.934850,-1.934850
6,Alabama,Louisville,401012246,75,7,1.934850,5.065150
7,Louisville,Alabama,401012246,86,0,1.437646,-1.437646
8,Alabama,Louisville,401012246,78,0,1.799249,-1.799249
9,Louisville,Alabama,401012246,80,0,1.708848,-1.708848


In [6]:
teams_df = df.groupby(by=["offense"]).agg(["mean"])["PRE"]
teams_df.columns = ["OPRE"]
teams_df.index.name = "team"
teams_df

Unnamed: 0_level_0,OPRE
team,Unnamed: 1_level_1
Air Force,0.175804
Akron,-0.884153
Alabama,1.749527
Appalachian State,0.363252
Arizona,-0.152115
Arizona State,0.491906
Arkansas,-0.398073
Arkansas State,-0.125106
Army,1.185737
Auburn,-0.447203


In [7]:
defenses = df.groupby(by=["defense"]).agg(["mean"])["PRE"]
defenses.columns = ["DPRE"]
defenses.index.name = "team"
defenses

Unnamed: 0_level_0,DPRE
team,Unnamed: 1_level_1
Air Force,0.269519
Akron,-0.646530
Alabama,-1.147062
Appalachian State,-0.722994
Arizona,-0.020046
Arizona State,-0.054289
Arkansas,0.203776
Arkansas State,0.449896
Army,-0.084765
Auburn,-0.741319


In [8]:
teams_df = pd.merge(teams_df, defenses, how="inner", left_index=True, right_index=True)
for index, row in teams_df.iterrows():
    print(index)
    print(row)

Air Force
OPRE    0.175804
DPRE    0.269519
Name: Air Force, dtype: float64
Akron
OPRE   -0.884153
DPRE   -0.646530
Name: Akron, dtype: float64
Alabama
OPRE    1.749527
DPRE   -1.147062
Name: Alabama, dtype: float64
Appalachian State
OPRE    0.363252
DPRE   -0.722994
Name: Appalachian State, dtype: float64
Arizona
OPRE   -0.152115
DPRE   -0.020046
Name: Arizona, dtype: float64
Arizona State
OPRE    0.491906
DPRE   -0.054289
Name: Arizona State, dtype: float64
Arkansas
OPRE   -0.398073
DPRE    0.203776
Name: Arkansas, dtype: float64
Arkansas State
OPRE   -0.125106
DPRE    0.449896
Name: Arkansas State, dtype: float64
Army
OPRE    1.185737
DPRE   -0.084765
Name: Army, dtype: float64
Auburn
OPRE   -0.447203
DPRE   -0.741319
Name: Auburn, dtype: float64
BYU
OPRE   -0.249133
DPRE    0.112706
Name: BYU, dtype: float64
Ball State
OPRE   -0.295357
DPRE    0.483944
Name: Ball State, dtype: float64
Baylor
OPRE    0.090502
DPRE    0.582117
Name: Baylor, dtype: float64
Boise State
OPRE    0.810194

OPRE    0.582598
DPRE   -0.966418
Name: Utah State, dtype: float64
Vanderbilt
OPRE    0.014290
DPRE    0.190855
Name: Vanderbilt, dtype: float64
Virginia
OPRE    0.287440
DPRE   -0.161011
Name: Virginia, dtype: float64
Virginia Tech
OPRE   -0.069059
DPRE    0.161491
Name: Virginia Tech, dtype: float64
Wake Forest
OPRE   -0.175087
DPRE    0.364232
Name: Wake Forest, dtype: float64
Washington
OPRE    0.247433
DPRE   -0.579220
Name: Washington, dtype: float64
Washington State
OPRE    1.377987
DPRE   -0.025625
Name: Washington State, dtype: float64
West Virginia
OPRE    0.733390
DPRE   -0.434022
Name: West Virginia, dtype: float64
Western Kentucky
OPRE   -0.477796
DPRE    0.305492
Name: Western Kentucky, dtype: float64
Western Michigan
OPRE    0.545603
DPRE    0.422740
Name: Western Michigan, dtype: float64
Wisconsin
OPRE    0.431753
DPRE   -0.245530
Name: Wisconsin, dtype: float64
Wyoming
OPRE   -0.684266
DPRE   -0.094897
Name: Wyoming, dtype: float64


In [9]:
teams_df.sort_values(by="OPRE", ascending=False).head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
Oklahoma,2.048562,0.198811
Alabama,1.749527,-1.147062
UCF,1.662833,-0.52953
Washington State,1.377987,-0.025625
Ohio,1.193939,0.440416
Army,1.185737,-0.084765
Georgia Tech,1.151463,0.397154
Ohio State,1.04274,-0.28892
Georgia,1.021392,-0.75624
Houston,0.99221,-0.0431


In [10]:
teams_df.sort_values(by="DPRE").head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
Michigan,0.643111,-1.225941
Alabama,1.749527,-1.147062
Kentucky,-0.488756,-1.146182
Clemson,0.911748,-1.085638
Iowa,0.104358,-1.005314
UAB,0.120434,-1.001174
Temple,-0.266015,-0.968498
Utah State,0.582598,-0.966418
Mississippi State,0.163058,-0.962886
Miami,-0.095364,-0.913338


In [11]:
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.386853,4.613147
1,Louisville,Alabama,401012246,75,0,1.934850,-1.934850
2,Alabama,Louisville,401012246,80,0,1.708848,-1.708848
3,Louisville,Alabama,401012246,83,0,1.573247,-1.573247
4,Alabama,Louisville,401012246,55,7,2.838856,4.161144
5,Louisville,Alabama,401012246,75,0,1.934850,-1.934850
6,Alabama,Louisville,401012246,75,7,1.934850,5.065150
7,Louisville,Alabama,401012246,86,0,1.437646,-1.437646
8,Alabama,Louisville,401012246,78,0,1.799249,-1.799249
9,Louisville,Alabama,401012246,80,0,1.708848,-1.708848


In [12]:
df["off_oppAdj"] = 0.0
df["def_oppAdj"] = 0.0

for index, row in df.iterrows():
    df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "DPRE"]
    df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "OPRE"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE,off_oppAdj,def_oppAdj
0,Alabama,Louisville,401012246,65,7,2.386853,4.613147,3.472079,2.863621
1,Louisville,Alabama,401012246,75,0,1.934850,-1.934850,-0.787787,-1.194837
2,Alabama,Louisville,401012246,80,0,1.708848,-1.708848,-2.849917,-3.458374
3,Louisville,Alabama,401012246,83,0,1.573247,-1.573247,-0.426185,-0.833234
4,Alabama,Louisville,401012246,55,7,2.838856,4.161144,3.020075,2.411617
5,Louisville,Alabama,401012246,75,0,1.934850,-1.934850,-0.787787,-1.194837
6,Alabama,Louisville,401012246,75,7,1.934850,5.065150,3.924082,3.315624
7,Louisville,Alabama,401012246,86,0,1.437646,-1.437646,-0.290584,-0.697633
8,Alabama,Louisville,401012246,78,0,1.799249,-1.799249,-2.940317,-3.548775
9,Louisville,Alabama,401012246,80,0,1.708848,-1.708848,-0.561786,-0.968835


In [13]:
teams_df[["off_aPPD"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
teams_df[["def_aPPD"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]
teams_df

Unnamed: 0_level_0,OPRE,DPRE,off_aPPD,def_aPPD
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Air Force,0.175804,0.269519,0.184724,0.249647
Akron,-0.884153,-0.646530,-0.810760,-0.376899
Alabama,1.749527,-1.147062,1.246940,-1.125457
Appalachian State,0.363252,-0.722994,0.008285,-0.773269
Arizona,-0.152115,-0.020046,-0.172719,0.018913
Arizona State,0.491906,-0.054289,0.506502,0.221777
Arkansas,-0.398073,0.203776,-0.120544,0.116482
Arkansas State,-0.125106,0.449896,-0.319147,0.101674
Army,1.185737,-0.084765,1.011684,-0.342334
Auburn,-0.447203,-0.741319,-0.244271,-0.667879


In [14]:
def printLeaders():
    print(teams_df.sort_values(by="off_aPPD", ascending=False).head(25))
    print(teams_df.sort_values(by="def_aPPD").head(25))
    print()

printLeaders()

                           OPRE      DPRE  off_aPPD  def_aPPD
team                                                         
Oklahoma               2.048562  0.198811  1.893789  0.231952
Washington State       1.377987 -0.025625  1.253587  0.246325
Alabama                1.749527 -1.147062  1.246940 -1.125457
Georgia                1.021392 -0.756240  1.139373 -0.732537
UCF                    1.662833 -0.529530  1.053178 -0.152885
Army                   1.185737 -0.084765  1.011684 -0.342334
Ohio                   1.193939  0.440416  0.977439  0.659396
Louisiana              0.772519  1.517505  0.940743  1.414110
Clemson                0.911748 -1.085638  0.927179 -1.244652
Ohio State             1.042740 -0.288920  0.925448 -0.122726
Georgia Tech           1.151463  0.397154  0.875886  0.352617
NC State               0.689262 -0.077991  0.870870 -0.341762
Michigan               0.643111 -1.225941  0.825979 -1.168066
Ole Miss               0.589262  0.551427  0.722728  0.404908
Houston 

In [15]:
def adjust():
    for index, row in df.iterrows():
        df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "def_aPPD"]
        df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "off_aPPD"]

    teams_df[["off_aPPD"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
    teams_df[["def_aPPD"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]

In [16]:
for i in range(40):
    adjust()
    print(teams_df.at["Oklahoma", "off_aPPD"])
printLeaders()

2.1948746047721643
2.1357797351771453
2.2137073395253406
2.196835473581357
2.2238599994669097
2.2195811887578336
2.2313140353822685
2.2303677829017605
2.236528576325862
2.236160252105838
2.239970304667768
2.239451671881669
2.2421557877034655
2.24136607572634
2.2435096759593436
2.242488721075879
2.244334791599548
2.2431480350476627
2.2448319455373147
2.2435346719029337
2.2451289742037925
2.2437607799653208
2.2453052612570588
2.2438925679456005
2.2454093093863357
2.2439690998300494
2.2454704207789344
2.2440133690153092
2.245506150835304
2.244038867843614
2.2455269487706335
2.2440534869627724
2.2455390005902296
2.2440618249842386
2.245545951278707
2.2440665523458714
2.245549939386191
2.2440692139805942
2.2455522145093485
2.2440707001112505
                       OPRE      DPRE  off_aPPD  def_aPPD
team                                                     
Oklahoma           2.048562  0.198811  2.244071  0.062791
Georgia            1.021392 -0.756240  1.659613 -1.409251
Alabama            1.

In [17]:
teams_df["net_aPPD"] = teams_df["off_aPPD"] - teams_df["def_aPPD"]

In [18]:
teams_df.sort_values(by="net_aPPD", ascending=False).head(25).reset_index()

Unnamed: 0,team,OPRE,DPRE,off_aPPD,def_aPPD,net_aPPD
0,Alabama,1.749527,-1.147062,1.592204,-1.575666,3.16787
1,Georgia,1.021392,-0.75624,1.659613,-1.409251,3.068864
2,Mississippi State,0.163058,-0.962886,0.871163,-1.531208,2.402371
3,Clemson,0.911748,-1.085638,0.896397,-1.469108,2.365506
4,Michigan,0.643111,-1.225941,0.917396,-1.344194,2.261591
5,Florida,0.072816,-0.703283,0.821561,-1.422982,2.244543
6,Oklahoma,2.048562,0.198811,2.244071,0.062791,2.18128
7,LSU,-0.0024,-0.66587,0.921685,-1.220438,2.142124
8,Kentucky,-0.488756,-1.146182,0.20245,-1.787169,1.989619
9,West Virginia,0.73339,-0.434022,0.980608,-0.724905,1.705512


In [19]:
teams_df["AP"] = "NR"
teams_df

Unnamed: 0_level_0,OPRE,DPRE,off_aPPD,def_aPPD,net_aPPD,AP
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Air Force,0.175804,0.269519,0.063325,0.437035,-0.373711,NR
Akron,-0.884153,-0.646530,-0.842483,-0.472132,-0.370351,NR
Alabama,1.749527,-1.147062,1.592204,-1.575666,3.167870,NR
Appalachian State,0.363252,-0.722994,-0.195101,-0.728090,0.532989,NR
Arizona,-0.152115,-0.020046,-0.208303,-0.020466,-0.187837,NR
Arizona State,0.491906,-0.054289,0.579675,0.133612,0.446063,NR
Arkansas,-0.398073,0.203776,-0.043115,-0.147138,0.104022,NR
Arkansas State,-0.125106,0.449896,-0.427947,0.276360,-0.704307,NR
Army,1.185737,-0.084765,0.911104,-0.199282,1.110386,NR
Auburn,-0.447203,-0.741319,-0.010528,-1.120642,1.110113,NR


In [20]:
teams_df.to_csv("Data/teams_aPPD.tsv", sep="\t")

In [21]:
teams_df.describe()

Unnamed: 0,OPRE,DPRE,off_aPPD,def_aPPD,net_aPPD
count,130.0,130.0,130.0,130.0,130.0
mean,0.017499,0.005714,0.017375,0.00296,0.014415
std,0.620578,0.638628,0.718106,0.752023,1.207844
min,-1.136283,-1.225941,-1.866896,-1.787169,-2.939698
25%,-0.397094,-0.495718,-0.453123,-0.560107,-0.889594
50%,-0.012005,-0.016189,0.02994,0.006153,-0.047052
75%,0.393129,0.388924,0.561637,0.466277,0.893777
max,2.048562,2.080375,2.244071,2.032427,3.16787
