In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("Data/2018drives_augmented.tsv", sep="\t", index_col=False)
df

Unnamed: 0,offense,defense,game_id,plays,start_yardline,yards,end_yardline,drive_result,points,start_distance,dist_range
0,Alabama,Louisville,401012246,7,35,65,100,TD,7,65,"(60.0, 65.0]"
1,Louisville,Alabama,401012246,4,75,20,55,PUNT,0,75,"(70.0, 75.0]"
2,Alabama,Louisville,401012246,8,20,63,65,FUMBLE,0,80,"(75.0, 80.0]"
3,Louisville,Alabama,401012246,3,83,-1,84,PUNT,0,83,"(80.0, 85.0]"
4,Alabama,Louisville,401012246,6,45,55,100,TD,7,55,"(50.0, 55.0]"
5,Louisville,Alabama,401012246,8,75,49,26,INT,0,75,"(70.0, 75.0]"
6,Alabama,Louisville,401012246,6,25,75,100,TD,7,75,"(70.0, 75.0]"
7,Louisville,Alabama,401012246,6,86,31,55,PUNT,0,86,"(85.0, 90.0]"
8,Alabama,Louisville,401012246,5,22,19,41,PUNT,0,78,"(75.0, 80.0]"
9,Louisville,Alabama,401012246,4,80,20,60,PUNT,0,80,"(75.0, 80.0]"


In [2]:
grouped_df = pd.read_csv("Data/2018drives_grouped.tsv", sep="\t", index_col=False)
grouped_df

Unnamed: 0,start_distance,count,mean
0,1,3,2.333333
1,2,4,6.000000
2,3,13,5.615385
3,4,6,5.000000
4,5,2,7.000000
5,6,6,5.833333
6,7,12,5.333333
7,8,7,4.285714
8,9,11,5.272727
9,10,9,5.000000


In [3]:
from sklearn.linear_model import LinearRegression

x = np.array(grouped_df["start_distance"]).reshape(-1, 1)
y = np.array(grouped_df["mean"]).reshape(-1, 1)
weights = np.array(grouped_df["count"])

reg = LinearRegression().fit(x, y, sample_weight=weights)
print(reg.score(x, y, sample_weight=weights))
print()
print(reg.coef_)
print(reg.intercept_)

0.8056954578118931

[[-0.04590342]]
[5.35287406]


In [4]:
df = df[["offense", "defense", "game_id", "start_distance", "points"]].copy()
df["expectedPoints"] = reg.predict(np.array(df["start_distance"]).reshape(-1, 1))
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints
0,Alabama,Louisville,401012246,65,7,2.369152
1,Louisville,Alabama,401012246,75,0,1.910118
2,Alabama,Louisville,401012246,80,0,1.680601
3,Louisville,Alabama,401012246,83,0,1.542891
4,Alabama,Louisville,401012246,55,7,2.828186
5,Louisville,Alabama,401012246,75,0,1.910118
6,Alabama,Louisville,401012246,75,7,1.910118
7,Louisville,Alabama,401012246,86,0,1.405180
8,Alabama,Louisville,401012246,78,0,1.772408
9,Louisville,Alabama,401012246,80,0,1.680601


In [5]:
df["PRE"] = df["points"] - df["expectedPoints"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.369152,4.630848
1,Louisville,Alabama,401012246,75,0,1.910118,-1.910118
2,Alabama,Louisville,401012246,80,0,1.680601,-1.680601
3,Louisville,Alabama,401012246,83,0,1.542891,-1.542891
4,Alabama,Louisville,401012246,55,7,2.828186,4.171814
5,Louisville,Alabama,401012246,75,0,1.910118,-1.910118
6,Alabama,Louisville,401012246,75,7,1.910118,5.089882
7,Louisville,Alabama,401012246,86,0,1.405180,-1.405180
8,Alabama,Louisville,401012246,78,0,1.772408,-1.772408
9,Louisville,Alabama,401012246,80,0,1.680601,-1.680601


In [6]:
teams_df = df.groupby(by=["offense"]).agg(["mean"])["PRE"]
teams_df.columns = ["OPRE"]
teams_df.index.name = "team"
teams_df

Unnamed: 0_level_0,OPRE
team,Unnamed: 1_level_1
Air Force,-0.037386
Akron,-0.805996
Alabama,1.768521
Appalachian State,0.655309
Arizona,-0.242635
Arizona State,0.609223
Arkansas,-0.521397
Arkansas State,-0.360100
Army,1.101578
Auburn,-0.428165


In [7]:
defenses = df.groupby(by=["defense"]).agg(["mean"])["PRE"]
defenses.columns = ["DPRE"]
defenses.index.name = "team"
defenses

Unnamed: 0_level_0,DPRE
team,Unnamed: 1_level_1
Air Force,-0.034339
Akron,-0.377049
Alabama,-1.122933
Appalachian State,-0.888214
Arizona,0.172190
Arizona State,0.046989
Arkansas,0.022634
Arkansas State,0.235630
Army,-0.097059
Auburn,-0.718319


In [8]:
teams_df = pd.merge(teams_df, defenses, how="inner", left_index=True, right_index=True)
for index, row in teams_df.iterrows():
    print(index)
    print(row)

Air Force
OPRE   -0.037386
DPRE   -0.034339
Name: Air Force, dtype: float64
Akron
OPRE   -0.805996
DPRE   -0.377049
Name: Akron, dtype: float64
Alabama
OPRE    1.768521
DPRE   -1.122933
Name: Alabama, dtype: float64
Appalachian State
OPRE    0.655309
DPRE   -0.888214
Name: Appalachian State, dtype: float64
Arizona
OPRE   -0.242635
DPRE    0.172190
Name: Arizona, dtype: float64
Arizona State
OPRE    0.609223
DPRE    0.046989
Name: Arizona State, dtype: float64
Arkansas
OPRE   -0.521397
DPRE    0.022634
Name: Arkansas, dtype: float64
Arkansas State
OPRE   -0.36010
DPRE    0.23563
Name: Arkansas State, dtype: float64
Army
OPRE    1.101578
DPRE   -0.097059
Name: Army, dtype: float64
Auburn
OPRE   -0.428165
DPRE   -0.718319
Name: Auburn, dtype: float64
BYU
OPRE   -0.016709
DPRE    0.437945
Name: BYU, dtype: float64
Ball State
OPRE   -0.227757
DPRE    0.326588
Name: Ball State, dtype: float64
Baylor
OPRE    0.327302
DPRE    0.435848
Name: Baylor, dtype: float64
Boise State
OPRE    0.582520
D

In [9]:
teams_df.sort_values(by="OPRE", ascending=False).head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
Oklahoma,1.9791,0.279567
Alabama,1.768521,-1.122933
UCF,1.685718,-0.506938
Washington State,1.35303,-0.221503
Georgia,1.196647,-0.795909
Ohio,1.127608,0.653165
Army,1.101578,-0.097059
Ohio State,1.065134,-0.264397
Georgia Tech,0.980063,0.319482
Clemson,0.930941,-1.057624


In [10]:
teams_df.sort_values(by="DPRE").head(10)

Unnamed: 0_level_0,OPRE,DPRE
team,Unnamed: 1_level_1,Unnamed: 2_level_1
Michigan,0.662725,-1.201848
Alabama,1.768521,-1.122933
Clemson,0.930941,-1.057624
Kentucky,-0.323839,-1.05585
Miami,0.046865,-1.044295
Iowa,0.430235,-1.012383
Temple,-0.245374,-0.947183
Mississippi State,0.063292,-0.945777
Appalachian State,0.655309,-0.888214
Fresno State,0.367543,-0.849435


In [11]:
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE
0,Alabama,Louisville,401012246,65,7,2.369152,4.630848
1,Louisville,Alabama,401012246,75,0,1.910118,-1.910118
2,Alabama,Louisville,401012246,80,0,1.680601,-1.680601
3,Louisville,Alabama,401012246,83,0,1.542891,-1.542891
4,Alabama,Louisville,401012246,55,7,2.828186,4.171814
5,Louisville,Alabama,401012246,75,0,1.910118,-1.910118
6,Alabama,Louisville,401012246,75,7,1.910118,5.089882
7,Louisville,Alabama,401012246,86,0,1.405180,-1.405180
8,Alabama,Louisville,401012246,78,0,1.772408,-1.772408
9,Louisville,Alabama,401012246,80,0,1.680601,-1.680601


In [12]:
df["off_oppAdj"] = 0.0
df["def_oppAdj"] = 0.0
for index, row in df.iterrows():
    df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "DPRE"]
    df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "OPRE"]
df

Unnamed: 0,offense,defense,game_id,start_distance,points,expectedPoints,PRE,off_oppAdj,def_oppAdj
0,Alabama,Louisville,401012246,65,7,2.369152,4.630848,3.680628,2.862327
1,Louisville,Alabama,401012246,75,0,1.910118,-1.910118,-0.787185,-1.024651
2,Alabama,Louisville,401012246,80,0,1.680601,-1.680601,-2.630821,-3.449122
3,Louisville,Alabama,401012246,83,0,1.542891,-1.542891,-0.419958,-0.657423
4,Alabama,Louisville,401012246,55,7,2.828186,4.171814,3.221594,2.403293
5,Louisville,Alabama,401012246,75,0,1.910118,-1.910118,-0.787185,-1.024651
6,Alabama,Louisville,401012246,75,7,1.910118,5.089882,4.139662,3.321361
7,Louisville,Alabama,401012246,86,0,1.405180,-1.405180,-0.282247,-0.519713
8,Alabama,Louisville,401012246,78,0,1.772408,-1.772408,-2.722628,-3.540929
9,Louisville,Alabama,401012246,80,0,1.680601,-1.680601,-0.557668,-0.795133


In [13]:
teams_df[["off_aPPD"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
teams_df[["def_aPPD"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]
teams_df

Unnamed: 0_level_0,OPRE,DPRE,off_aPPD,def_aPPD
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Air Force,-0.037386,-0.034339,-0.157724,0.032333
Akron,-0.805996,-0.377049,-0.789907,-0.226025
Alabama,1.768521,-1.122933,1.320888,-0.990042
Appalachian State,0.655309,-0.888214,0.183936,-0.743969
Arizona,-0.242635,0.172190,-0.382682,0.224289
Arizona State,0.609223,0.046989,0.550705,0.313070
Arkansas,-0.521397,0.022634,-0.177633,-0.076735
Arkansas State,-0.360100,0.235630,-0.301418,-0.014576
Army,1.101578,-0.097059,1.049902,-0.291712
Auburn,-0.428165,-0.718319,-0.211535,-0.668981


In [14]:
def printLeaders():
    print(teams_df.sort_values(by="off_aPPD", ascending=False).head(10))
    print(teams_df.sort_values(by="def_aPPD").head(10))

printLeaders()

                      OPRE      DPRE  off_aPPD  def_aPPD
team                                                    
Oklahoma          1.979100  0.279567  1.886705  0.263849
Alabama           1.768521 -1.122933  1.320888 -0.990042
Georgia           1.196647 -0.795909  1.137842 -0.703175
Washington State  1.353030 -0.221503  1.131198  0.192712
Army              1.101578 -0.097059  1.049902 -0.291712
UCF               1.685718 -0.506938  1.004209 -0.109741
Clemson           0.930941 -1.057624  0.949869 -1.177773
Ohio              1.127608  0.653165  0.827896  0.821471
Ohio State        1.065134 -0.264397  0.824045 -0.113429
Michigan          0.662725 -1.201848  0.823207 -1.127393
                       OPRE      DPRE  off_aPPD  def_aPPD
team                                                     
Clemson            0.930941 -1.057624  0.949869 -1.177773
Michigan           0.662725 -1.201848  0.823207 -1.127393
Alabama            1.768521 -1.122933  1.320888 -0.990042
Kentucky          -0.32383

In [15]:
def adjust():
    for index, row in df.iterrows():
        df.at[index, "off_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["defense"], "def_aPPD"]
        df.at[index, "def_oppAdj"] = df.at[index, "PRE"] - teams_df.at[row["offense"], "off_aPPD"]

    teams_df[["off_aPPD"]] = df.groupby(by="offense").agg(["mean"])["off_oppAdj"]
    teams_df[["def_aPPD"]] = df.groupby(by="defense").agg(["mean"])["def_oppAdj"]

In [16]:
for i in range(40):
    adjust()
    print(teams_df.at["Oklahoma", "off_aPPD"])
printLeaders()

2.1487901896891577
2.081819039144995
2.1530775298698357
2.1261442187582764
2.149571513353421
2.138755138774137
2.1471215739592386
2.1426734788428603
2.145866839508306
2.1439964288225815
2.145299801765836
2.1444952028929274
2.145065030777531
2.144708603984638
2.144974868295111
2.1448099781359296
2.14494196368319
2.1448607005473446
2.1449293297541696
2.1448858523146432
2.1449228329756695
2.144897400448686
2.1449178274966174
2.1449017282002916
2.144913196223405
2.1449024075451986
2.1449089107131565
2.144901432051308
2.144905140929348
2.1448998728568074
2.144901993988009
2.144898265530176
2.144899479556138
2.1448968450693893
2.144897539461651
2.14489568687931
2.1448960833630393
2.1448947885596237
2.1448950143401717
2.144894115167353
                      OPRE      DPRE  off_aPPD  def_aPPD
team                                                    
Oklahoma          1.979100  0.279567  2.144894  0.118958
Alabama           1.768521 -1.122933  1.703191 -1.504534
Georgia           1.196647 -0.795

In [17]:
teams_df["net_aPPD"] = teams_df["off_aPPD"] - teams_df["def_aPPD"]

In [18]:
teams_df.sort_values(by="net_aPPD", ascending=False).head(25).reset_index()

Unnamed: 0,team,OPRE,DPRE,off_aPPD,def_aPPD,net_aPPD
0,Alabama,1.768521,-1.122933,1.703191,-1.504534,3.207725
1,Georgia,1.196647,-0.795909,1.654651,-1.358859,3.01351
2,Clemson,0.930941,-1.057624,0.905015,-1.396383,2.301398
3,Michigan,0.662725,-1.201848,0.921399,-1.292969,2.214368
4,Mississippi State,0.063292,-0.945777,0.780289,-1.416435,2.196724
5,Florida,0.168022,-0.831031,0.852683,-1.324941,2.177624
6,LSU,0.016889,-0.640888,0.923786,-1.196815,2.120602
7,Oklahoma,1.9791,0.279567,2.144894,0.118958,2.025937
8,Kentucky,-0.323839,-1.05585,0.35931,-1.626834,1.986144
9,Texas A&M,0.150321,-0.191324,1.02399,-0.725486,1.749476


In [21]:
teams_df["AP"] = "NR"
teams_df

Unnamed: 0_level_0,OPRE,DPRE,off_aPPD,def_aPPD,net_aPPD,AP
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Air Force,-0.037386,-0.034339,-0.182085,0.313110,-0.495196,NR
Akron,-0.805996,-0.377049,-0.790290,-0.238068,-0.552222,NR
Alabama,1.768521,-1.122933,1.703191,-1.504534,3.207725,NR
Appalachian State,0.655309,-0.888214,0.057539,-0.746065,0.803604,NR
Arizona,-0.242635,0.172190,-0.401344,0.255255,-0.656598,NR
Arizona State,0.609223,0.046989,0.614312,0.195918,0.418394,NR
Arkansas,-0.521397,0.022634,-0.043401,-0.294367,0.250966,NR
Arkansas State,-0.360100,0.235630,-0.348716,0.211131,-0.559847,NR
Army,1.101578,-0.097059,1.024943,-0.104538,1.129481,NR
Auburn,-0.428165,-0.718319,0.066904,-1.140377,1.207282,NR


In [22]:
teams_df.to_csv("Data/teams_aPPD.tsv", sep="\t")