<a href="https://colab.research.google.com/github/zzzsheep/NFL-Predictor-/blob/main/NFL_predictor_V1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Install Packages
!pip install nfl_data_py
!pip install pandas
!pip install os
!pip install matplotlib


In [None]:
import nfl_data_py as nfl
import pandas as pd
import os
import urllib.request
import matplotlib.pyplot as plt

**Imports play-by-play data**

In [None]:
pbp_data = nfl.import_pbp_data([2021, 2022, 2023, 2024], downcast=True)
teams = pd.unique(pbp_data[['home_team', 'away_team', 'posteam', 'defteam']].values.ravel())
# Print the list of unique teams
print("All unique teams:", teams)

#df_passing = pd.read_csv('nfl_team_stats_2024.csv')
#df_rushing = pd.read_csv('nfl_team_rushing_stats_2024.csv')

**ALL functions used**

In [None]:
# Converts player name to match abbreviation name in dataset
def parse_name(player_name):
  parts = player_name.split()
  if len(parts) < 2:
    raise ValueError("No valid first or last name")
  first_init = parts[0][0]
  last_name = ' '.join(parts[1:])
  abbr_name = first_init + '.' + last_name
  return abbr_name

# NOt complete
def check_name(player, player_data):
    player_info = player_data['receiver_id'].iloc[0]
    print(player_info)

    if not player_info.empty:
      name = player_info['player_name'].iloc[0]
      print(name)
    else:
      raise ValueError("player_id not found")


# Determine the correct column to filter based on the player’s position
def player_pos(position):
  if position == 'qb':
      return 'passer'
  elif position == 'rb':
      return 'rusher'
  elif position == 'wr':
      return'receiver'
  else:
      raise ValueError("Invalid position entered! Choose from passer, rusher, or receiver.")

# Calculates the average total yards per game
def average_calc(player_data, season=None, data_type=None):
  if season == 'current':
      total_games = player_data['games'].sum()
  else:
      total_games = player_data['game_id'].nunique()

  if data_type == "passing":
    total_yards = player_data['passing_yards'].sum()
  if data_type == "rushing":
    total_yards = player_data['rushing_yards'].sum()
  if data_type == "receiving":
    total_yards = player_data['receiving_yards'].sum()

  if total_games > 0:
    return round(total_yards / total_games, 2)   # Calculate average with total games and yards
  else:
    return 0  # Handle case where no games are played

# Displays data from player, has parameter for which season
def player_stats_summary(player_season, values, groupby, pos, data_type=None, player=None):

  player_stats_summary = (player_season
         .loc[:, values]
         .groupby(groupby,
                  as_index=False)
         .sum(numeric_only=True))
  if pos == 'passer':
    if data_type != 'current':
        player_stats_summary['avg passing ypg'] = average_calc(player_season, None, 'passing')
        player_stats_summary['total_games'] = player_season['game_id'].nunique()
        new_column_order = ['season', 'passer', 'total_games', 'avg passing ypg','pass', 'passing_yards','pass_attempt',
                      'complete_pass', 'rushing_yards', 'sack', 'yards_gained',
                      'pass_touchdown']

    else:
        player_stats_summary['player'] = player
        player_stats_summary['avg passing ypg'] = average_calc(player_season, 'current', 'passing')
        new_column_order = ['season', 'player','games',
                            'avg passing ypg','passing_yards', 'rushing_yards',
                            'sack_yards']

    player_stats_summary = player_stats_summary[new_column_order]
    print(player_stats_summary.to_string(index=False))

    return player_stats_summary['avg passing ypg'].values[0]



  if pos == 'rusher':
    if data_type != 'current':
      player_stats_summary['avg rushing ypg'] = average_calc(player_season, None, 'rushing')
      player_stats_summary['total_games'] = player_season['game_id'].nunique()
      new_column_order = ['season', 'rusher', 'total_games' , 'avg rushing ypg','rushing_yards',
                          'rush_attempt', 'rush_touchdown', 'rush']

    else:
      player_stats_summary['player'] = player
      player_stats_summary['avg rushing ypg'] = average_calc(player_season, 'current', 'rushing')
      new_column_order = ['season', 'player', 'games', 'avg rushing ypg', 'rushing_yards', 'completions',
                    'attempts', 'rushing_tds', 'receptions', 'targets', 'receiving_yards', 'carries']

    player_stats_summary = player_stats_summary[new_column_order]
    print(player_stats_summary.to_string(index=False))
    return player_stats_summary['avg rushing ypg'].values[0]


  if pos == 'receiver':
    if data_type != "current":
      player_stats_summary['avg receiving ypg'] = average_calc(player_season, None, 'receiving')
      player_stats_summary['total_games'] = player_season['game_id'].nunique()
      new_column_order = ['season', 'receiver', 'total_games', 'avg receiving ypg', 'receiving_yards',
         'rush_attempt', 'touchdown', 'first_down']
    else:
      player_stats_summary['player'] = player
      player_stats_summary['avg receiving ypg'] = average_calc(player_season, 'current', 'receiving')
      new_column_order = ['season', 'player', 'games', 'avg receiving ypg', 'receiving_yards', 'receptions',
                    'attempts', 'receptions', 'targets', 'receiving_yards', 'receiving_tds','receiving_first_downs', 'receiving_epa']
    player_stats_summary = player_stats_summary[new_column_order]
    print(player_stats_summary.to_string(index=False))

    return player_stats_summary['avg receiving ypg'].values[0]


# Gets player's player id by using roster import
def player_info(player_name):
  rosters = nfl.import_seasonal_rosters([2024], ['player_id', 'player_name', 'week'])
  player_info = rosters[rosters['player_name'].str.contains(player_name, case=False, na=False)]

  if not player_info.empty:

    player_id = player_info['player_id'].iloc[0]

    return player_id
  else:
    raise ValueError("player_id not found")

# Get Opponent team Rushing and Passing Yards per Game
def opp_team_stats(defteam, season, hometeam=None, yard_type=None):
  print("No data found, replacing with preset data.")

  data = pbp_data[(pbp_data['defteam'] == defteam) &
                  (pbp_data['season'] == season)]

  total_games = data['game_id'].nunique()
  if data.empty:
    print(f"Opp team did not play in the {season} season.")
    return 0

  opp_receiving_yards = pbp_data[
    (pbp_data['defteam'] == defteam) &                   # Rams are on defense
    (pbp_data['posteam'] == hometeam) &                  # Miami Dolphins are on offense
    (pbp_data['position'] == 'wr')                    # Only wide receivers
]


  total_receiving_yards_allowed = opp_receiving_yards['receiving_yards'].sum()
  if yard_type == 'rusher':
    avg_rushing_yds = data['rushing_yards'].sum() / total_games
    print(f"\033[1;32m\nChanged {yard_type} yards value to: {round(avg_rushing_yds, 2)}\033[0m\n")
    return avg_rushing_yds
  if yard_type == 'passer':
    avg_passing_yds = data['passing_yards'].sum() / total_games
    print(f"\033[1;32m\nChanged {yard_type} yards value to: {round(avg_passing_yds, 2)}\033[0m\n")
    return avg_passing_yds
  else:
    avg_receiving_yds = total_receiving_yards_allowed / total_games
    print(f"\033[1;32m\nChanged {yard_type} yards value to: {round(avg_receiving_yds, 2)}\033[0m\n")
    return avg_receiving_yds

# Return how many weeks currently in season
  # Get weekly data
def get_current_week(season_type, season):
  pbp_data_weekly = nfl.import_weekly_data([season], downcast=True)
  current_week = pbp_data_weekly[pbp_data_weekly['season_type'] == season_type]['week'].max()
  #print("Current week in " + game_type + " season:", current_week)
  return current_week



#pbp_data.columns.values#<--shows all variables
#pbp_data.head(10)

**EXCEPTION FUNCTIONS**

In [None]:
# Check if player is a rookie player season<=1
def rookie(player_data, player, column):
  player_values = player_data[player_data[column] == player]
  seasons_played = player_values['season'].nunique()
  if seasons_played < 1:
    print(f"\033[1;31mWARNING!!! {player} has played in {seasons_played} seasons.\033[0m")

# QB ONLY, check if player has been traded since last season
def traded_exc(player_data=None, player=None):
  player_team_past = player_data[player_data['player_name'] == player]
  last_season_team = player_data[player_data['season'] == last_year]['recent_team'].unique()
  current_season_team = player_data[player_data['season'] == current_year]['recent_team'].unique()
  team_count = player_team_past.groupby('season')['recent_team'].unique()
  for season, team in team_count.items():
    team_list = ','.join(team)
    print(f"In season {season}, {player_name} played for: {team_list}")
  if set(last_season_team)!= set(current_season_team):
    print(f"\033[1;31mWARNING!!! {player} has been traded since the last season.\033[0m")
  return current_season_team
# Check if player has played against opp team in the past 2-3 seasons
def total_game_against_opp(player_data, def_team, player, pos):
  total_games = player_data[(player_data['defteam'] == def_team) &
                            (player_data[pos] == player)]['game_id'].nunique()

  if total_games < 1:
    print(f"\033[1;31mWARNING!!! {player} hasn't played against {def_team} in the last 2-3 years!\033[0m")
    return True
  if total_games < 2:
    print(f"\033[1;31mWARNING!!! {player} has only played 1 game aginst {def_team} in the last 2-3 years!\033[0m")
    return True

  return False



**QUARTERBACK VALUES**

In [None]:

qb_values = ['season', 'season_type','passer', 'game_id', 'pass', 'passing_yards', 'pass_attempt',
            'complete_pass', 'rushing_yards',
            'sack', 'yards_gained', 'pass_touchdown']

qb_groupby_values = ['season', 'passer']

qb_values_current = ['season', 'player_id', 'games','season_type', 'passing_yards', 'rushing_yards', 'sack_yards']

qb_groupby_current_values = ['season', 'player_id']

**RUNNINGBACK VALUES**

In [None]:
rb_values = ['season', 'season_type','rusher', 'game_id', 'rushing_yards',
 'rush_attempt', 'rush_touchdown', 'rush']

rb_groupby_values = ['season', 'rusher']

rb_values_current = ['season', 'player_id', 'games', 'season_type', 'rushing_yards', 'completions',
                    'attempts', 'rushing_tds', 'receptions', 'targets', 'receiving_yards', 'carries']

rb_groupby_current_values = ['season', 'player_id']

**WIDERECEIVERS VALUES**

In [None]:
wr_values =['season', 'season_type','receiver', 'game_id', 'receiving_yards',
 'rush_attempt', 'touchdown', 'first_down']
wr_groupby_values = ['season', 'receiver']
wr_values_current = ['season', 'player_id', 'games', 'season_type', 'receiving_yards', 'receptions',
                    'attempts', 'receptions', 'targets', 'receiving_yards', 'receiving_tds','receiving_first_downs', 'receiving_epa']
wr_groupby_current_values = ['season', 'player_id']

###**INPUT FROM USER**

In [None]:
test = False

if test:
  player_name = "Justin Jefferson"
  abbr_player_name = "J.Jefferson"
  oppo_team = "NYG"
  current_year = 2024
  last_year = current_year - 1
  column = player_pos('wr')
  game_type = "REG"
else:
  player_name = input("input of player name: ")
  abbr_player_name = parse_name(player_name)
  oppo_team = input("opposing team abbr: ")
  current_year = int(input("current season: "))
  column = player_pos(input("Enter player position (qb, rb, wr, te): ").strip().lower() )  # Ask for player position to decide which column to filter
  last_year = current_year - 1
  game_type = 'REG'

# Check for exceptions

rookie(pbp_data, abbr_player_name, column)
if column == 'passer':
    pbp_data_weekly = nfl.import_weekly_data([last_year, current_year], downcast=True)
    player_team = traded_exc(pbp_data_weekly, abbr_player_name)

swap_percentage = total_game_against_opp(pbp_data, oppo_team, abbr_player_name, column)

**CALCULATE HOW MANY WEEKS WE ARE IN THE SEASON**

In [None]:
# Get weekly data
pbp_data_weekly = nfl.import_weekly_data([2024], downcast=True)


current_week = pbp_data_weekly[pbp_data_weekly['season_type'] == game_type]['week'].max()
print("Current week in " + game_type + " season:", current_week)

TEAM PLAY DATA LAST 2-3 YEARS

In [None]:
# Filter play-by-play data for the player, opponent team, and current season
player_vs_oppo_stats = pbp_data[
    (pbp_data[column] == abbr_player_name) &           # Filter by the correct player column
    (pbp_data['defteam'] == oppo_team.upper()) &
    (pbp_data['season_type'] == game_type)
    # Filter by opponent team
]

#check_name(player_name, player_vs_oppo_stats)
if player_vs_oppo_stats.empty:
   if get_current_week(game_type, current_year) < 4:
      print(f"\033[1;31mNo data found for player {abbr_player_name} against team {oppo_team.upper()} in {game_type} between 2021-2023.\n Getting last season data.\033[0m")
      avg_yards_vs_opp = opp_team_stats(oppo_team, last_year, column)
   else:
      print(f"\033[1;31mNo data found for player {abbr_player_name} against team {oppo_team.upper()} in {game_type} between 2021-2023. \n Getting current season data.\033[0m")
      avg_yards_vs_opp = opp_team_stats(oppo_team, current_year, player_team, column)

else:
  if column == 'passer':
      avg_yards_vs_opp = player_stats_summary(player_vs_oppo_stats, qb_values, qb_groupby_values, column)
  if column == 'rusher':
      avg_yards_vs_opp = player_stats_summary(player_vs_oppo_stats, rb_values, rb_groupby_values, column)
  if column == 'receiver':
      avg_yards_vs_opp = player_stats_summary(player_vs_oppo_stats, wr_values, wr_groupby_values, column)



**PLAYER CURRENT SEASON STATS**

In [None]:
pbp_data_seasonal = nfl.import_seasonal_data([current_year], 'REG')
player_current_season_stats = pbp_data_seasonal[
    (pbp_data_seasonal['player_id'] == player_info(player_name)) &
    (pbp_data_seasonal['season_type'] == game_type)
]      #Filter by season game type

if column == 'passer':
  avg_yards_cur = player_stats_summary(player_current_season_stats, qb_values_current, qb_groupby_current_values, column, 'current', player_name)
if column == 'rusher':
  avg_yards_cur = player_stats_summary(player_current_season_stats, rb_values_current, rb_groupby_current_values, column, 'current', player_name)
if column == 'receiver':
  avg_yards_cur = player_stats_summary(player_current_season_stats, wr_values_current, wr_groupby_current_values, column, 'current', player_name)


**PLAYER LAST SEASON STATS**

In [None]:
player_last_season_stats = pbp_data[
    (pbp_data[column] == abbr_player_name) &           # Filter by the correct player column # Filter by opponent team
    (pbp_data['season'] == last_year)&
     (pbp_data['season_type'] == game_type)     # Filter by current season
]
'''if player_last_season_stats.empty:
    print(f"\033[1;31mNo data found for player {abbr_player_name} against team in {game_type} the {last_ year} season.\033[0m")'''
if column == 'passer':
    avg_yards_last = player_stats_summary(player_last_season_stats, qb_values, qb_groupby_values, column)
elif column == 'rusher':
    avg_yards_last = player_stats_summary(player_last_season_stats, rb_values, rb_groupby_values, column)
else:
    avg_yards_last = player_stats_summary(player_last_season_stats, wr_values, wr_groupby_values, column)


**GET NUMBER OF GAMES PLAYER PLAYED CURRENT SEASON**

In [None]:
current_games = player_current_season_stats['games'].sum()
print(f"game played current season: {current_games}.")

**CALCULATIONS FOR FORMULA**

In [None]:
def final_calc(current, last, team_played):
  return (current * current_severity +
          last * last_severity +
          team_played * team_played_severity)

In [None]:
# Calculate all percentages for each severity
percentage_tweaker = 1.1765
current_severity = 0.2 + (current_games * percentage_tweaker)/100
last_severity = 0.4 - (current_games * percentage_tweaker)/100
if swap_percentage is True:
  team_played_severity = current_severity
  current_severity = 0.4
else:
  team_played_severity = 0.4
# RB alterations
if column == "rusher":
    current_severity = 0.8
    last_severity = 0.1
    team_played_severity = 0.1
    if swap_percentage is True:
      current_severity = 0.9
      last_severity = 0.1
      team_played_severity = 0

# Check if player is dual-threat qb(running qb)
check_rush_yards = final_calc(average_calc(player_current_season_stats, 'current', 'rushing') ,
                              average_calc(player_last_season_stats, None, 'rushing'),
                              average_calc(player_vs_oppo_stats, None, 'rushing')  )
final_calculated_projection = final_calc(avg_yards_cur,  avg_yards_last, avg_yards_vs_opp )
if column == "rusher":
  print(f"\033[1;32m\n\nFINAL CALCULATED PROJECTION OF RUSH YARDS: {round(check_rush_yards, 2)}\033[0m\n\n")
if column == "passer":
  print(f"\033[1;32m\n\nFINAL CALCULATED PROJECTION OF PASS YARDS: {round(final_calculated_projection, 2)}\033[0m\n\n")

if column == "receiver":
    print(f"\033[1;32m\n\nFINAL CALCULATED PROJECTION OF RECEIVING YARDS: {round(final_calculated_projection, 2)}\033[0m\n\n")


if check_rush_yards>= 40.99:
  final_calculated_projection = final_calc(avg_yards_cur,  avg_yards_last, avg_yards_vs_opp ) + check_rush_yards
  print(f"\033[1;32m\n\nFINAL CALCULATED PROJECTION OF PASS YARDS WITH ADDED RUSH YARDS: {round(final_calculated_projection, 2)}\033[0m\n\n")



In [None]:
#inspired by Nick Wan on Youtube