In [1]:
# I want to predict the individual teams total points and add them together so build dataframes with teams offensive stats and opponents defensive stats

# Retrieve data, clean data and store in folder to train model on

In [2]:
# Imports

import nflreadpy as nfl
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait

## Retrieve data

In [3]:
schedule = nfl.load_schedules(seasons=2025)
schedule = schedule.to_pandas()

team_stats = nfl.load_team_stats(seasons=2025)
team_stats = team_stats.to_pandas()

In [4]:
team_stats

Unnamed: 0,season,week,team,season_type,opponent_team,completions,attempts,passing_yards,passing_tds,passing_interceptions,...,pat_made,pat_att,pat_missed,pat_blocked,pat_pct,gwfg_made,gwfg_att,gwfg_missed,gwfg_blocked,gwfg_distance
0,2025,1,ARI,REG,NO,21,29,163,2,0,...,2,2,0,0,1.0,0,0,0,0,0
1,2025,1,ATL,REG,TB,27,42,298,1,0,...,2,2,0,0,1.0,0,0,0,0,0
2,2025,1,BAL,REG,BUF,14,19,209,2,0,...,4,5,1,0,0.8,0,0,0,0,0
3,2025,1,BUF,REG,BAL,33,46,394,2,0,...,2,2,0,0,1.0,1,1,0,0,32
4,2025,1,CAR,REG,JAX,18,35,154,1,2,...,1,1,0,0,1.0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,2025,7,SEA,REG,HOU,17,32,213,1,2,...,3,3,0,0,1.0,0,0,0,0,0
212,2025,7,SF,REG,ATL,17,26,152,0,1,...,2,2,0,0,1.0,0,0,0,0,0
213,2025,7,TB,REG,DET,28,50,228,1,1,...,0,0,0,0,,0,0,0,0,0
214,2025,7,TEN,REG,NE,25,34,255,1,1,...,1,1,0,0,1.0,0,0,0,0,0


In [5]:
# Filter out data I don't want

cols_to_drop = ['season', 
 'season_type',
 'sack_fumbles',
 'passing_air_yards',
 'passing_yards_after_catch',
 'passing_first_downs',
 'passing_2pt_conversions',
 'rushing_2pt_conversions',
 'receptions',
 'targets',
 'receiving_yards',
 'receiving_tds',
 'receiving_fumbles',
 'receiving_air_yards',
 'receiving_yards_after_catch',
 'receiving_first_downs',
 'receiving_2pt_conversions',
 'special_teams_tds',
 'misc_yards',
 'fumble_recovery_own',
 'fumble_recovery_yards_own',
 'fumble_recovery_opp',
 'fumble_recovery_yards_opp',
 'fumble_recovery_tds',
 'timeouts',
 'fg_made',
 'fg_missed',
 'fg_blocked',
 'fg_long',
 'fg_made_0_19',
 'fg_made_20_29',
 'fg_made_30_39',
 'fg_made_40_49',
 'fg_made_50_59',
 'fg_made_60_',
 'fg_missed_0_19',
 'fg_missed_20_29',
 'fg_missed_30_39',
 'fg_missed_40_49',
 'fg_missed_50_59',
 'fg_missed_60_',
 'fg_made_list',
 'fg_missed_list',
 'fg_blocked_list',
 'fg_made_distance',
 'fg_missed_distance',
 'fg_blocked_distance', 
 'pat_made',
 'pat_att',
 'pat_missed',
 'pat_blocked',
 'gwfg_made',
 'gwfg_att',
 'gwfg_missed',
 'gwfg_blocked',
 'gwfg_distance']

team_stats = team_stats.drop(columns=cols_to_drop)

In [6]:
# for each week group by team sum/avg all previous weeks 
team_stats = team_stats.sort_values(["team", "week"]).reset_index(drop=True)

In [7]:
# Cumulative averages up to a certain week

avg_cols = ['passing_epa',
 'passing_cpoe',
 'rushing_epa',
 'receiving_epa',
 'fg_pct',
 'pat_pct',
 'completions',
 'attempts',
 'passing_yards',
 'passing_tds',
 'passing_interceptions',
 'sacks_suffered',
 'sack_yards_lost',
 'sack_fumbles_lost',
 'carries',
 'rushing_yards',
 'rushing_tds',
 'rushing_fumbles_lost',
 'rushing_first_downs',
 'receiving_fumbles_lost',
 'def_tackles_solo',
 'def_tackles_with_assist',
 'def_tackle_assists',
 'def_tackles_for_loss',
 'def_tackles_for_loss_yards',
 'def_fumbles_forced',
 'def_sacks',
 'def_sack_yards',
 'def_qb_hits',
 'def_interceptions',
 'def_interception_yards',
 'def_pass_defended',
 'def_tds',
 'def_fumbles',
 'def_safeties',
 'penalties',
 'penalty_yards',
 'punt_returns',
 'punt_return_yards',
 'kickoff_returns',
 'kickoff_return_yards',
 'fg_att']

team_stats[[f"avg_{c}" for c in avg_cols]] = (
    team_stats.groupby("team")[avg_cols]
      .expanding()
      .mean()
      .reset_index(level=0, drop=True)
)


In [8]:
team_stats

Unnamed: 0,week,team,opponent_team,completions,attempts,passing_yards,passing_tds,passing_interceptions,sacks_suffered,sack_yards_lost,...,avg_def_tds,avg_def_fumbles,avg_def_safeties,avg_penalties,avg_penalty_yards,avg_punt_returns,avg_punt_return_yards,avg_kickoff_returns,avg_kickoff_return_yards,avg_fg_att
0,1,ARI,NO,21,29,163,2,0,5,-33,...,0.0,0.000000,0.000000,9.000000,54.000000,3.000000,30.000000,3.000000,73.000000,3.000000
1,2,ARI,CAR,17,25,220,1,1,1,-9,...,0.0,0.000000,0.000000,10.500000,75.000000,2.000000,29.500000,3.500000,80.000000,2.500000
2,3,ARI,SF,22,35,159,1,0,1,-5,...,0.0,0.000000,0.333333,8.666667,60.000000,1.666667,22.333333,3.666667,91.666667,2.666667
3,4,ARI,SEA,27,41,200,2,2,6,-36,...,0.0,0.000000,0.250000,8.250000,56.500000,1.500000,20.500000,4.000000,100.250000,2.500000
4,5,ARI,TEN,23,32,220,0,0,3,-28,...,0.0,0.200000,0.200000,8.200000,54.600000,1.600000,19.600000,4.000000,98.200000,2.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,3,WAS,LV,15,21,207,1,0,1,-8,...,0.0,0.000000,0.000000,8.000000,57.666667,3.333333,59.000000,3.000000,102.333333,2.000000
212,4,WAS,ATL,16,27,156,2,1,2,-9,...,0.0,0.500000,0.000000,7.500000,55.750000,3.000000,44.250000,3.750000,126.500000,2.500000
213,5,WAS,LAC,15,26,231,1,0,1,-5,...,0.0,0.400000,0.000000,7.400000,54.200000,2.400000,35.400000,3.600000,119.600000,2.400000
214,6,WAS,CHI,19,26,211,3,1,3,-6,...,0.0,0.333333,0.000000,7.000000,51.833333,2.166667,31.000000,3.666667,120.833333,2.333333


## Retrieve Time of Possesion Stats

In [9]:
# https://www.teamrankings.com/nfl/stat/average-time-of-possession-net-of-ot

# set up the browser
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

In [10]:
# visit a website
driver.get("https://www.teamrankings.com/nfl/stat/average-time-of-possession-net-of-ot")

In [11]:
try:
    popup_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//*[@id=\"html\"]/body/div[4]/div/div/div[2]/div/button[2]"))
    )
    popup_button.click()
    print("Cookies accepted.")
except:
    print("No cookie popup found (or different wording).")

No cookie popup found (or different wording).


In [12]:
# //*[@id="DataTables_Table_0"]

table = driver.find_element("xpath", "//*[@id=\"DataTables_Table_0\"]")  # example XPath

df = pd.read_html(table.get_attribute("outerHTML"))[0]

driver.quit()

  df = pd.read_html(table.get_attribute("outerHTML"))[0]


In [13]:
df

Unnamed: 0,Rank,Team,2025,Last 3,Last 1,Home,Away,2024
0,1,Buffalo,33:11,31:24,28:21,33:07,33:21,30:34
1,2,Carolina,32:32,32:47,35:05,31:44,33:08,27:13
2,3,Kansas City,32:17,34:54,42:08,33:44,30:21,30:06
3,4,Atlanta,32:15,31:14,27:50,33:42,30:48,29:54
4,5,LA Chargers,32:08,34:21,34:27,35:02,29:58,30:23
5,6,Detroit,31:48,31:47,31:39,30:05,33:06,31:46
6,7,San Francisco,31:44,33:33,32:10,28:20,34:18,30:29
7,8,Arizona,31:40,32:58,34:10,30:17,33:30,29:32
8,9,Chicago,31:15,32:38,36:19,32:04,30:25,30:01
9,10,Jacksonville,31:05,29:52,28:14,30:55,31:18,26:59


## Add stats for previous weeks in schedule

In [14]:
# For week x in the schedule we need the offensive stats of 1 team and the defensive stats of the other team from week x - 1
schedule

Unnamed: 0,game_id,season,game_type,week,gameday,weekday,gametime,away_team,away_score,home_team,...,wind,away_qb_id,home_qb_id,away_qb_name,home_qb_name,away_coach,home_coach,referee,stadium_id,stadium
0,2025_01_DAL_PHI,2025,REG,1,2025-09-04,Thursday,20:20,DAL,20.0,PHI,...,11.0,00-0033077,00-0036389,Dak Prescott,Jalen Hurts,Brian Schottenheimer,Nick Sirianni,Shawn Smith,PHI00,Lincoln Financial Field
1,2025_01_KC_LAC,2025,REG,1,2025-09-05,Friday,20:00,KC,21.0,LAC,...,,00-0033873,00-0036355,Patrick Mahomes,Justin Herbert,Andy Reid,Jim Harbaugh,Carl Cheffers,LAX01,SoFi Stadium
2,2025_01_TB_ATL,2025,REG,1,2025-09-07,Sunday,13:00,TB,23.0,ATL,...,,00-0034855,00-0039917,Baker Mayfield,Michael Penix,Todd Bowles,Raheem Morris,Land Clark,ATL97,Mercedes-Benz Stadium
3,2025_01_CIN_CLE,2025,REG,1,2025-09-07,Sunday,13:00,CIN,17.0,CLE,...,10.0,00-0036442,00-0026158,Joe Burrow,Joe Flacco,Zac Taylor,Kevin Stefanski,Adrian Hill,CLE00,FirstEnergy Stadium
4,2025_01_MIA_IND,2025,REG,1,2025-09-07,Sunday,13:00,MIA,8.0,IND,...,,00-0036212,00-0035710,Tua Tagovailoa,Daniel Jones,Mike McDaniel,Shane Steichen,Brad Allen,IND00,Lucas Oil Stadium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,2025_18_DAL_NYG,2025,REG,18,2026-01-04,Sunday,13:00,DAL,,NYG,...,,,,,,Brian Schottenheimer,Brian Daboll,,NYC01,MetLife Stadium
268,2025_18_WAS_PHI,2025,REG,18,2026-01-04,Sunday,13:00,WAS,,PHI,...,,,,,,Dan Quinn,Nick Sirianni,,PHI00,Lincoln Financial Field
269,2025_18_BAL_PIT,2025,REG,18,2026-01-04,Sunday,13:00,BAL,,PIT,...,,,,,,John Harbaugh,Mike Tomlin,,PIT00,Acrisure Stadium
270,2025_18_SEA_SF,2025,REG,18,2026-01-04,Sunday,13:00,SEA,,SF,...,,,,,,Mike Macdonald,Kyle Shanahan,,SFO01,Levi's Stadium


In [15]:
schedule[schedule['week'] > 1]

Unnamed: 0,game_id,season,game_type,week,gameday,weekday,gametime,away_team,away_score,home_team,...,wind,away_qb_id,home_qb_id,away_qb_name,home_qb_name,away_coach,home_coach,referee,stadium_id,stadium
16,2025_02_WAS_GB,2025,REG,2,2025-09-11,Thursday,20:15,WAS,18.0,GB,...,7.0,00-0039910,00-0036264,Jayden Daniels,Jordan Love,Dan Quinn,Matt LaFleur,Brad Rogers,GNB00,Lambeau Field
17,2025_02_CLE_BAL,2025,REG,2,2025-09-14,Sunday,13:00,CLE,17.0,BAL,...,3.0,00-0026158,00-0034796,Joe Flacco,Lamar Jackson,Kevin Stefanski,John Harbaugh,Brad Allen,BAL00,M&T Bank Stadium
18,2025_02_JAX_CIN,2025,REG,2,2025-09-14,Sunday,13:00,JAX,27.0,CIN,...,3.0,00-0036971,00-0036442,Trevor Lawrence,Joe Burrow,Liam Coen,Zac Taylor,Alan Eck,CIN00,Paycor Stadium
19,2025_02_NYG_DAL,2025,REG,2,2025-09-14,Sunday,13:00,NYG,37.0,DAL,...,,00-0029263,00-0033077,Russell Wilson,Dak Prescott,Brian Daboll,Brian Schottenheimer,Bill Vinovich,DAL00,AT&T Stadium
20,2025_02_CHI_DET,2025,REG,2,2025-09-14,Sunday,13:00,CHI,21.0,DET,...,,00-0039918,00-0033106,Caleb Williams,Jared Goff,Ben Johnson,Dan Campbell,Land Clark,DET00,Ford Field
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,2025_18_DAL_NYG,2025,REG,18,2026-01-04,Sunday,13:00,DAL,,NYG,...,,,,,,Brian Schottenheimer,Brian Daboll,,NYC01,MetLife Stadium
268,2025_18_WAS_PHI,2025,REG,18,2026-01-04,Sunday,13:00,WAS,,PHI,...,,,,,,Dan Quinn,Nick Sirianni,,PHI00,Lincoln Financial Field
269,2025_18_BAL_PIT,2025,REG,18,2026-01-04,Sunday,13:00,BAL,,PIT,...,,,,,,John Harbaugh,Mike Tomlin,,PIT00,Acrisure Stadium
270,2025_18_SEA_SF,2025,REG,18,2026-01-04,Sunday,13:00,SEA,,SF,...,,,,,,Mike Macdonald,Kyle Shanahan,,SFO01,Levi's Stadium
