In [3]:
import requests
import numpy as np
import pandas as pd
import pickle
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.inspection import PartialDependenceDisplay
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, learning_curve, cross_val_score, KFold
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

# Range of years
start_year = 2013
end_year = 2022
current_year = 2023

In [4]:
# Load the dataframe
with open('data.pickle','rb') as file:
    data = pickle.load(file)

data.shape

(5372, 313)

In [11]:
# # Save the dataframe to a pickle file to avoid repeatedly recreating the dataframe
# data = data.dropna()

# with open('data.pickle','wb') as file:
#     pickle.dump(data, file)

# Get data via API calls

In [211]:
# Method to conviently fetch data from different CFBD endpoints 
def get_api_data(endpoint, params=None):
    base_url = "https://api.collegefootballdata.com"
    url = base_url + endpoint

    api_key = "h2jcXh77WoIog0ba1MdofV5zcg0L0I8NHC3EMmnymPP5G3xF/LxHn+x3asvvSIo2"

    headers = {
        "Authorization": f"Bearer {api_key}"
    }

    response = requests.get(url, params=params, headers=headers)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Request failed with status code: {response.status_code}")
        return None

In [212]:
def get_games_info(df, start_year, end_year):
    print("get_games_info")
    # List to store game data
    games = []
    
    # Iterate through the years, make API requests, and only collect the relevant features
    for year in range(start_year, end_year + 1):
        params = {
            "year": year,
            "seasonType": "regular",
        }
        
        games_data = get_api_data("/games", params)
    
        # Checking if the request was successful (status code 200)
        if games_data:
            for g in games_data:
                if g['home_conference'] is not None and g['away_conference'] is not None and g['home_points'] is not None and g['away_points'] is not None:
                    games.append({
                        'id': g['id'],
                        'year': g['season'],
                        'week': g['week'],
                        'neutral_site': g['neutral_site'],
                        'home_team': g['home_team'],
                        'home_conference': g['home_conference'],
                        'home_points': g['home_points'],
                        'home_elo': g['home_pregame_elo'],
                        'away_team': g['away_team'],
                        'away_conference': g['away_conference'],
                        'away_points': g['away_points'],
                        'away_elo': g['away_pregame_elo']
                    })
    
    # Create a DataFrame from the list of game data
    games = pd.DataFrame(games).dropna()
    games['margin'] = games['away_points'] - games['home_points']
    df = pd.concat([df, games], ignore_index=True)
    return df

In [213]:
def get_betting_lines(df, start_year, end_year):
    print("get_betting_lines")
    
    # Collect data for the consensus spread of the games
    for year in range(start_year, end_year + 1):
        params = {
            "year": year
        }
        
        betting_data = get_api_data("/lines", params)
    
        for betting_line in betting_data:
            if betting_line['lines'] and any(l['provider'] == 'Bovada' for l in betting_line['lines']):
                consensus_spread = next(l['spread'] for l in betting_line['lines'] if l['provider'] == 'Bovada')
                df.loc[df['id'] == betting_line['id'], 'spread'] = float(consensus_spread)
    return df

In [214]:
def get_talent_data(df, start_year, end_year):
    print("get_talent_data")

    # Create a dictionary to collect roster talent data
    talent_dict = {}
    
    # Collect data for the 247 roster talent ratings
    for year in range(start_year, end_year + 1):
        params = {
            "year": year
        }
        
        talent_data = get_api_data("/talent", params)
    
        for talent in talent_data:
            key = (talent['school'], talent['year'])
            if key not in talent_dict:
                talent_dict[key] = talent['talent']
    
    # Map the talent data to the DataFrame based on 'school' and 'year'
    df['home_talent'] = df.apply(lambda row: talent_dict.get((row['home_team'], row['year']), None), axis=1)
    df['away_talent'] = df.apply(lambda row: talent_dict.get((row['away_team'], row['year']), None), axis=1)
    
    # Convert 'home_talent' and 'away_talent' columns to float
    df['home_talent'] = df['home_talent'].astype(float)
    df['away_talent'] = df['away_talent'].astype(float)
    
    return df

In [215]:
def get_poll_data(df, start_year, end_year):
    print("get_poll_data")

    # Create a dictionary to collect poll ranking data
    rankings_dict = {}
    
    # Only consider the AP Top 25 poll and Coaches Poll
    unique_poll_names = set(['AP Top 25','Coaches Poll'])
    
    # Collect data for the polls
    for year in range(start_year, end_year + 1):
        params = {
            "year": year
        }
        
        rankings_data = get_api_data("/rankings", params)
    
        for rr in rankings_data:
            for poll in rr['polls']:
                if poll['poll'] in unique_poll_names:
                    for rank in poll['ranks']:
                        key = (rank['school'], rr['season'], rr['week'], poll['poll'])
                        rankings_dict[key] = rank['points']
    
    # Map the ranking data to the DataFrame based on 'home_team' or 'away_team','year', and 'week'
    for poll_name in unique_poll_names:
        home_column_name = f'home_{poll_name.replace(" ", "_").lower()}_points'
        away_column_name = f'away_{poll_name.replace(" ", "_").lower()}_points'
        df[home_column_name] = df.apply(
            lambda row: rankings_dict.get((row['home_team'], row['year'], row['week'], poll_name), 0),
            axis=1
        )
        df[away_column_name] = df.apply(
            lambda row: rankings_dict.get((row['away_team'], row['year'], row['week'], poll_name), 0),
            axis=1
        )
        
    return df

In [216]:
def get_basic_stats_data(df, start_year, end_year):
    print("get_basic_stats_data")

    # Collect data for the cumulative basic stats for each team for each week (e.g. number of turnovers from weeks 1 to 5 in the week 6 row)
    max_week = df['week'].max()
    season_stats_dict = {}
    # Only get these basic stats
    stat_name_mappings = {
        'possessionTime': 'possession_time',
        'turnovers': 'turnovers',
        'penalties': 'penalties',
        'sacks': 'sacks',
        'tacklesForLoss': 'tackles_for_loss',
        'firstDowns': 'first_downs',
        'thirdDowns': 'third_downs',
        'fourthDowns': 'fourth_downs',
        'thirdDownConversions': 'third_down_conversions',
        'fourthDownConversions': 'fourth_down_conversions',
        'passAttempts': 'pass_attempts',
        'passCompletions': 'pass_completions',
        'netPassingYards': 'net_passing_yards',
        'passingTDs': 'passing_TDs',
        'rushingAttempts': 'rushing_attempts',
        'rushingYards': 'rushing_yards',
        'rushingTDs': 'rushing_TDs',
        'totalYards': 'total_yards'
    }
    # Collect the basic stats data
    for year in range(start_year, end_year + 1):
        print(year, end='  ')
        for week in range (1, max_week):
            params = {
                "year": year,
                "startWeek": 1,
                "endWeek": week
            }
            season_stats_data = get_api_data("/stats/season", params)
            if season_stats_data is not None:
                if len(season_stats_data) > 0:
                    for team_stat in season_stats_data:
                        if 'statName' in team_stat and team_stat['statName'] in stat_name_mappings:
                            key = (team_stat['team'], team_stat['season'], week, stat_name_mappings[team_stat['statName']])
                            season_stats_dict[key] = team_stat['statValue']
                    
    # Map the basic stats data to the teams, offset by a week to not include the current week's data
    for stat in stat_name_mappings.values():
        home_column_name = f'home_{stat}_cumulative'
        away_column_name = f'away_{stat}_cumulative'
        df[home_column_name] = df.apply(
            lambda row: season_stats_dict.get((row['home_team'], row['year'], row['week'] - 1, stat), 0),
            axis=1
        )
        df[away_column_name] = df.apply(
            lambda row: season_stats_dict.get((row['away_team'], row['year'], row['week'] - 1, stat), 0),
            axis=1
        )

    df['home_yards_per_completion_cumulative'] = df['home_net_passing_yards_cumulative'] / df['home_pass_completions_cumulative']
    df['away_yards_per_completion_cumulative'] = df['away_net_passing_yards_cumulative'] / df['away_pass_completions_cumulative']
    
    df['home_yards_per_attempt_cumulative'] = df['home_net_passing_yards_cumulative'] / df['home_pass_attempts_cumulative']
    df['away_yards_per_attempt_cumulative'] = df['away_net_passing_yards_cumulative'] / df['away_pass_attempts_cumulative']
    
    df['home_yards_per_carry_cumulative'] = df['home_rushing_yards_cumulative'] / df['home_rushing_attempts_cumulative']
    df['away_yards_per_carry_cumulative'] = df['away_rushing_yards_cumulative'] / df['away_rushing_attempts_cumulative']

    df.fillna(0, inplace=True)
    
    return df

In [217]:
def get_adv_stats_data(df, start_year, end_year):
    print("get_adv_stats_data")

    # Collect data for the cumulative advanced stats for each team for each week (e.g. X from weeks 1 to 5 in the week 6 row)
    max_week = df['week'].max()
    season_adv_stats_dict = {}
    
    # Only get these advanced stats
    adv_stat_name_mappings = {
        'drives': 'drives',
        'ppa': 'ppa',
        'successRate': 'success_rate',
        'explosiveness': 'explosiveness',
        'pointsPerOpportunity': 'points_per_opportunity',
        'stuffRate': 'stuff_rate',
        'fieldPosition': 'field_position_average_predicted_points',
        'havoc': 'havoc',
    }
    
    # Collect the basic stats data
    for year in range(start_year, end_year + 1):
        print(year, end='  ')
        for week in range (1, max_week):
            params = {
                "year": year,
                "startWeek": 1,
                "endWeek": week
            }
            season_adv_stats_data = get_api_data("/stats/season/advanced", params)
            if season_adv_stats_data is not None:
                if len(season_adv_stats_data) > 0:
                    for team_stat in season_adv_stats_data:
                        if 'offense' in team_stat:
                            for offense_stat, value in team_stat['offense'].items():
                                if offense_stat in adv_stat_name_mappings:
                                    stat_value = value
                                    if offense_stat == 'havoc':
                                        stat_value = value['total']
                                    elif offense_stat == 'fieldPosition':
                                        stat_value = value['averagePredictedPoints']
                                    key = (team_stat['team'], team_stat['season'], week, 'offense_' + adv_stat_name_mappings[offense_stat])
                                    season_adv_stats_dict[key] = stat_value
                        if 'defense' in team_stat:
                            for offense_stat, value in team_stat['defense'].items():
                                if offense_stat in adv_stat_name_mappings:
                                    stat_value = value
                                    if offense_stat == 'havoc':
                                        stat_value = value['total']
                                    elif offense_stat == 'fieldPosition':
                                        stat_value = value['averagePredictedPoints']
                                    key = (team_stat['team'], team_stat['season'], week, 'defense_' + adv_stat_name_mappings[offense_stat])
                                    season_adv_stats_dict[key] = stat_value
    
    # Map the basic stats data to the teams, offset by a week to not include the current week's data
    for stat in adv_stat_name_mappings.values():
        home_offense_column_name = f'home_offense_{stat}_cumulative'
        away_offense_column_name = f'away_offense_{stat}_cumulative'
        home_defense_column_name = f'home_defense_{stat}_cumulative'
        away_defense_column_name = f'away_defense_{stat}_cumulative'
        df[home_offense_column_name] = df.apply(
            lambda row: season_adv_stats_dict.get((row['home_team'], row['year'], row['week'] - 1, 'offense_' + stat), 0),
            axis=1
        )
        df[away_offense_column_name] = df.apply(
            lambda row: season_adv_stats_dict.get((row['away_team'], row['year'], row['week'] - 1, 'offense_' + stat), 0),
            axis=1
        )
        df[home_defense_column_name] = df.apply(
            lambda row: season_adv_stats_dict.get((row['home_team'], row['year'], row['week'] - 1, 'defense_' + stat), 0),
            axis=1
        )
        df[away_defense_column_name] = df.apply(
            lambda row: season_adv_stats_dict.get((row['away_team'], row['year'], row['week'] - 1, 'defense_' + stat), 0),
            axis=1
        )
    
    # Since the head would have 0 for all of the stats since it's the first game of the season, look by a team
    return df

In [218]:
def get_returning_prod_data(df, start_year, end_year):
    print("get_returning_prod_data")

    # Collect data for the returning production of teams
    production_stats_dict = {}
    # Only get these basic stats
    stat_name_mappings = {
        'percentPassingPPA': 'percent_passing_ppa',
        'percentReceivingPPA': 'percent_receiving_ppa',
        'percentRushingPPA': 'percent_rushing_ppa'
    }
    
    # Collect the basic stats data
    for year in range(start_year, end_year + 1):
        params = {
            "year": year,
        }
        returning_production_data = get_api_data("/player/returning", params)
        
        for production_stat in returning_production_data:
            key = (production_stat['team'], production_stat['season'], stat_name_mappings['percentPassingPPA'])
            production_stats_dict[key] = production_stat['percentPassingPPA']
            key = (production_stat['team'], production_stat['season'], stat_name_mappings['percentReceivingPPA'])
            production_stats_dict[key] = production_stat['percentReceivingPPA']
            key = (production_stat['team'], production_stat['season'], stat_name_mappings['percentRushingPPA'])
            production_stats_dict[key] = production_stat['percentRushingPPA']
                    
    # Map the basic stats data to the teams, offset by a week to not include the current week's data
    for stat in stat_name_mappings.values():
        home_column_name = f'home_returning_production_{stat}'
        away_column_name = f'away_returning_production_{stat}'
        df[home_column_name] = df.apply(
            lambda row: production_stats_dict.get((row['home_team'], row['year'], stat), 0),
            axis=1
        )
        df[away_column_name] = df.apply(
            lambda row: production_stats_dict.get((row['away_team'], row['year'], stat), 0),
            axis=1
        )

    return df

In [219]:
def get_opponent_win_pct(df):
    # This system takes the sum of the team’s opponent’s records and multiplies by two. It then adds that number to the team’s opponent’s opponent’s record 
    # and multiplies by one. Finally, that number is divided by three * number of games that team ur finding SOS for has played.
    
    df['home_opp_win_pct'] = 0.0
    df['away_opp_win_pct'] = 0.0
    
    for index, row in df.iterrows():
        year = row['year']
        week = row['week']
        home_team = row['home_team']
        away_team = row['away_team']
    
        home_team_filter_condition = ((df['home_team'] == home_team) | (df['away_team'] == home_team))
        away_team_filter_condition = ((df['home_team'] == away_team) | (df['away_team'] == away_team))
    
        date_filter_condition = (df['year'] == year) & (df['week'] < week)
        date_filtered_data = df[date_filter_condition]
    
        home_team_opponents = pd.concat([date_filtered_data.loc[home_team_filter_condition, 'home_team'], date_filtered_data.loc[home_team_filter_condition, 'away_team']]).unique()
        away_team_opponents = pd.concat([date_filtered_data.loc[away_team_filter_condition, 'home_team'], date_filtered_data.loc[away_team_filter_condition, 'away_team']]).unique()
    
        home_team_opponents = [t for t in home_team_opponents if t != home_team]
        away_team_opponents = [t for t in away_team_opponents if t != away_team]
    
        home_opp_win_pct = []
        away_opp_win_pct = []
        
        for team in home_team_opponents:
            home_opp_win_pct.append(win_pct(date_filtered_data, team, year, week))
        for team in away_team_opponents:
            away_opp_win_pct.append(win_pct(date_filtered_data, team, year, week))        
    
        if len(home_opp_win_pct) > 0:
            df.at[index, 'home_opp_win_pct'] = sum(home_opp_win_pct) / len(home_opp_win_pct)
        if len(away_opp_win_pct) > 0:
            df.at[index, 'away_opp_win_pct'] = sum(away_opp_win_pct) / len(away_opp_win_pct)
    return df

def win_pct(df, team, year, week):
        wins = (((df['home_team'] == team) & (df['margin'] < 0)) | ((df['away_team'] == team) & (df['margin'] > 0))).sum()
        games = ((df['home_team'] == team) | (df['away_team'] == team)).sum()
        win_percentage = float(wins) / float(games) if games > 0 else 0.0
        return win_percentage

In [220]:
def get_opponent_opponents_win_pct(df):
    # team’s opponent’s opponent’s record 
    df['home_opp_opponents_win_pct'] = 0.0
    df['away_opp_opponents_win_pct'] = 0.0
        
    for index, row in df.iterrows():
        year = row['year']
        week = row['week']
        home_team = row['home_team']
        away_team = row['away_team']
    
        # Filter data for the same season and earlier weeks
        home_team_filter_condition = ((df['home_team'] == home_team) | (df['away_team'] == home_team))
        away_team_filter_condition = ((df['home_team'] == away_team) | (df['away_team'] == away_team))
    
        date_filter_condition = (df['year'] == year) & (df['week'] < week)
        date_filtered_data = df[date_filter_condition]
    
        home_team_opponents = pd.concat([date_filtered_data.loc[home_team_filter_condition, 'home_team'], date_filtered_data.loc[home_team_filter_condition, 'away_team']]).unique()
        away_team_opponents = pd.concat([date_filtered_data.loc[away_team_filter_condition, 'home_team'], date_filtered_data.loc[away_team_filter_condition, 'away_team']]).unique()
    
        home_team_opponents = [t for t in home_team_opponents if t != home_team]
        away_team_opponents = [t for t in away_team_opponents if t != away_team]
    
        home_opp_opponents_win_pct = []
        away_opp_opponents_win_pct = []        
    
        for team in home_team_opponents:        
            home_opp_opponents_win_pct.append(get_team_opp_opponents_win_pct(df, team, year, week, home_team))
        for team in away_team_opponents:
            away_opp_opponents_win_pct.append(get_team_opp_opponents_win_pct(df, team, year, week, away_team))
                
        if len(home_opp_opponents_win_pct) > 0:
            df.at[index, 'home_opp_opponents_win_pct'] = sum(home_opp_opponents_win_pct) / len(home_opp_opponents_win_pct)
        if len(away_opp_opponents_win_pct) > 0:
            df.at[index, 'away_opp_opponents_win_pct'] = sum(away_opp_opponents_win_pct) / len(away_opp_opponents_win_pct)
    
    return df

def get_team_opp_opponents_win_pct(df, the_team, year, week, ignore_team):
        the_team_condition = (((df['home_team'] == the_team) & (df['away_team'] != ignore_team)) | ((df['away_team'] == the_team) & (df['home_team'] != ignore_team)))
        date_filter_condition = (df['year'] == year) & (df['week'] < week)
        date_filtered_data = df[date_filter_condition]
        
        the_team_opponents = pd.concat([date_filtered_data.loc[the_team_condition, 'home_team'], date_filtered_data.loc[the_team_condition, 'away_team']]).unique()
        the_team_opponents = [t for t in the_team_opponents if t != the_team]
        
        the_team_opp_win_pct = []
        
        for team in the_team_opponents:
            the_team_opp_win_pct.append(win_pct(date_filtered_data, team, year, week))      
    
        if len(the_team_opp_win_pct) == 0:
            return 0.0
            
        return sum(the_team_opp_win_pct) / len(the_team_opp_win_pct)

In [221]:
def get_strength_of_schedule(df):
    print("get_strength_of_schedule")

    df = get_opponent_win_pct(df)
    df = get_opponent_opponents_win_pct(df)
    
    df['home_bcs_strength_of_schedule'] = (2 * df['home_opp_win_pct'] + df['home_opp_opponents_win_pct']) / 3
    df['away_bcs_strength_of_schedule'] = (2 * df['away_opp_win_pct'] + df['away_opp_opponents_win_pct']) / 3
    
    return df

In [222]:
def get_home_vs_away_field_team_performance(df):
    print("get_home_vs_away_field_team_performance")

    # Home team and Away team field performance
    home_team_performance_at_home_past_1_years = []
    away_team_performance_at_away_past_1_years = []
    home_team_performance_at_home_past_2_years = []
    away_team_performance_at_away_past_2_years = []
    home_team_performance_at_home_past_5_years = []
    away_team_performance_at_away_past_5_years = []

    for index, row in df.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        year = row['year']
        week = row['week']
        
        # Filter df for the past 1, 2, and 5 years
        home_team_data_past_1_years = df[(df['home_team'] == home_team) & ((df['year'] == year - 1) | ((df['year'] == year) & (df['week'] < week)))]
        away_team_data_past_1_years = df[(df['away_team'] == away_team) & ((df['year'] == year - 1) | ((df['year'] == year) & (df['week'] < week)))]
        home_team_data_past_2_years = df[(df['home_team'] == home_team) & (((df['year'] >= year - 2) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))]
        away_team_data_past_2_years = df[(df['away_team'] == away_team) & (((df['year'] >= year - 2) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))]
        home_team_data_past_5_years = df[(df['home_team'] == home_team) & (((df['year'] >= year - 5) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))]
        away_team_data_past_5_years = df[(df['away_team'] == away_team) & (((df['year'] >= year - 5) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))]
        
        if ((home_team_data_past_1_years['home_conference'] == home_team_data_past_1_years['away_conference']) & (home_team_data_past_1_years['neutral_site'] == False)).sum() > 0:
            home_team_performance_at_home_past_1_years.append((((home_team_data_past_1_years['spread'] > home_team_data_past_1_years['margin']) & (home_team_data_past_1_years['home_conference'] == home_team_data_past_1_years['away_conference']) & (home_team_data_past_1_years['neutral_site'] == False)).sum() +
                                        ((home_team_data_past_1_years['margin'] < 0) & (home_team_data_past_1_years['home_conference'] == home_team_data_past_1_years['away_conference'])).sum()) / (2 * ((home_team_data_past_1_years['home_conference'] == home_team_data_past_1_years['away_conference']) & (home_team_data_past_1_years['neutral_site'] == False)).sum()))        
        else:
            home_team_performance_at_home_past_1_years.append(0.0)
        if ((away_team_data_past_1_years['home_conference'] == away_team_data_past_1_years['away_conference']) & (away_team_data_past_1_years['neutral_site'] == False)).sum() > 0:
            away_team_performance_at_away_past_1_years.append((((away_team_data_past_1_years['spread'] < away_team_data_past_1_years['margin']) & (away_team_data_past_1_years['home_conference'] == away_team_data_past_1_years['away_conference']) & (away_team_data_past_1_years['neutral_site'] == False)).sum() +
                                        ((away_team_data_past_1_years['margin'] > 0) & (away_team_data_past_1_years['home_conference'] == away_team_data_past_1_years['away_conference'])).sum()) / (2 * ((away_team_data_past_1_years['home_conference'] == away_team_data_past_1_years['away_conference']) & (away_team_data_past_1_years['neutral_site'] == False)).sum()))        
        else:
            away_team_performance_at_away_past_1_years.append(0.0)
        if ((home_team_data_past_2_years['home_conference'] == home_team_data_past_2_years['away_conference']) & (home_team_data_past_2_years['neutral_site'] == False)).sum() > 0:
            home_team_performance_at_home_past_2_years.append((((home_team_data_past_2_years['spread'] > home_team_data_past_2_years['margin']) & (home_team_data_past_2_years['home_conference'] == home_team_data_past_2_years['away_conference']) & (home_team_data_past_2_years['neutral_site'] == False)).sum() +
                                        ((home_team_data_past_2_years['margin'] < 0) & (home_team_data_past_2_years['home_conference'] == home_team_data_past_2_years['away_conference'])).sum()) / (2 * ((home_team_data_past_2_years['home_conference'] == home_team_data_past_2_years['away_conference']) & (home_team_data_past_2_years['neutral_site'] == False)).sum()))
        else:
            home_team_performance_at_home_past_2_years.append(0.0)
        if ((away_team_data_past_2_years['home_conference'] == away_team_data_past_2_years['away_conference']) & (away_team_data_past_2_years['neutral_site'] == False)).sum() > 0:
            away_team_performance_at_away_past_2_years.append((((away_team_data_past_2_years['spread'] < away_team_data_past_2_years['margin']) & (away_team_data_past_2_years['home_conference'] == away_team_data_past_2_years['away_conference']) & (away_team_data_past_2_years['neutral_site'] == False)).sum() +
                                        ((away_team_data_past_2_years['margin'] > 0) & (away_team_data_past_2_years['home_conference'] == away_team_data_past_2_years['away_conference'])).sum()) / (2 * ((away_team_data_past_2_years['home_conference'] == away_team_data_past_2_years['away_conference']) & (away_team_data_past_2_years['neutral_site'] == False)).sum()))
        else:
            away_team_performance_at_away_past_2_years.append(0.0)
        if ((home_team_data_past_5_years['home_conference'] == home_team_data_past_5_years['away_conference']) & (home_team_data_past_5_years['neutral_site'] == False)).sum() > 0:
            home_team_performance_at_home_past_5_years.append((((home_team_data_past_5_years['spread'] > home_team_data_past_5_years['margin']) & (home_team_data_past_5_years['home_conference'] == home_team_data_past_5_years['away_conference']) & (home_team_data_past_5_years['neutral_site'] == False)).sum() +
                                        ((home_team_data_past_5_years['margin'] < 0) & (home_team_data_past_5_years['home_conference'] == home_team_data_past_5_years['away_conference'])).sum()) / (2 * ((home_team_data_past_5_years['home_conference'] == home_team_data_past_5_years['away_conference']) & (home_team_data_past_5_years['neutral_site'] == False)).sum()))
        else:
            home_team_performance_at_home_past_5_years.append(0.0)
        if ((away_team_data_past_5_years['home_conference'] == away_team_data_past_5_years['away_conference']) & (away_team_data_past_5_years['neutral_site'] == False)).sum() > 0:
            away_team_performance_at_away_past_5_years.append((((away_team_data_past_5_years['spread'] < away_team_data_past_5_years['margin']) & (away_team_data_past_5_years['home_conference'] == away_team_data_past_5_years['away_conference']) & (away_team_data_past_5_years['neutral_site'] == False)).sum() +
                                        ((away_team_data_past_5_years['margin'] > 0) & (away_team_data_past_5_years['home_conference'] == away_team_data_past_5_years['away_conference'])).sum()) / (2 * ((away_team_data_past_5_years['home_conference'] == away_team_data_past_5_years['away_conference']) & (away_team_data_past_5_years['neutral_site'] == False)).sum()))
        else:
            away_team_performance_at_away_past_5_years.append(0.0)

    # Add the new columns to the DataFrame
    df['home_team_performance_at_home_past_1_years'] = home_team_performance_at_home_past_1_years
    df['away_team_performance_at_away_past_1_years'] = away_team_performance_at_away_past_1_years
    df['home_team_performance_at_home_past_2_years'] = home_team_performance_at_home_past_2_years
    df['away_team_performance_at_away_past_2_years'] = away_team_performance_at_away_past_2_years
    df['home_team_performance_at_home_past_5_years'] = home_team_performance_at_home_past_5_years
    df['away_team_performance_at_away_past_5_years'] = away_team_performance_at_away_past_5_years

    df['home_vs_away_teams_performances_past_1_years'] = df['home_team_performance_at_home_past_1_years'] - df['away_team_performance_at_away_past_1_years']        
    df['home_vs_away_teams_performances_past_2_years'] = df['home_team_performance_at_home_past_2_years'] - df['away_team_performance_at_away_past_2_years']
    df['home_vs_away_teams_performances_past_5_years'] = df['home_team_performance_at_home_past_5_years'] - df['away_team_performance_at_away_past_5_years']

    return df

# Get relative stats

In [223]:
def get_past_ppg_stats(df):
    # Home and Away average scoring margin
    df['home_avg_scoring_margin_cumulative'] = 0.0
    df['away_avg_scoring_margin_cumulative'] = 0.0
    df['home_avg_scoring_margin_past_1_years'] = 0.0
    df['away_avg_scoring_margin_past_1_years'] = 0.0
    df['home_avg_scoring_margin_past_2_years'] = 0.0
    df['away_avg_scoring_margin_past_2_years'] = 0.0

    df['home_avg_ppg_cumulative'] = 0.0
    df['away_avg_ppg_cumulative'] = 0.0
    df['home_avg_ppg_past_1_years'] = 0.0
    df['away_avg_ppg_past_1_years'] = 0.0
    df['home_avg_ppg_past_2_years'] = 0.0
    df['away_avg_ppg_past_2_years'] = 0.0
    
    df['home_avg_ppg_allowed_cumulative'] = 0.0
    df['away_avg_ppg_allowed_cumulative'] = 0.0
    df['home_avg_ppg_allowed_past_1_years'] = 0.0
    df['away_avg_ppg_allowed_past_1_years'] = 0.0
    df['home_avg_ppg_allowed_past_2_years'] = 0.0
    df['away_avg_ppg_allowed_past_2_years'] = 0.0
    
    for index, row in df.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        year = row['year']
        week = row['week']
    
        # cumulative -> filter same year less week
        cumulative_home_data_filtered = df[((df['home_team'] == home_team) | (df['away_team'] == home_team)) & (df['year'] == year) & (df['week'] < week)]
        cumulative_away_data_filtered = df[((df['home_team'] == away_team) | (df['away_team'] == away_team)) & (df['year'] == year) & (df['week'] < week)]
    
        home_data_past_1_years_filtered = df[((df['home_team'] == home_team) | (df['away_team'] == home_team)) & ((df['year'] == year - 1) | ((df['year'] == year) & (df['week'] < week)))]
        away_data_past_1_years_filtered = df[((df['home_team'] == away_team) | (df['away_team'] == away_team)) & ((df['year'] == year - 1) | ((df['year'] == year) & (df['week'] < week)))]
    
        home_data_past_2_years_filtered = df[((df['home_team'] == home_team) | (df['away_team'] == home_team)) & (((df['year'] >= year - 2) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))]
        away_data_past_2_years_filtered = df[((df['home_team'] == away_team) | (df['away_team'] == away_team)) & (((df['year'] >= year - 2) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))]
        
        if cumulative_home_data_filtered.shape[0] > 0:
            df.at[index, 'home_avg_scoring_margin_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    -cumulative_home_data_filtered['margin'],
                    cumulative_home_data_filtered['margin']
                )
            )
            df.at[index, 'home_avg_ppg_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['home_points'],
                    cumulative_home_data_filtered['away_points']
                )
            )
            df.at[index, 'home_avg_ppg_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['away_points'],
                    cumulative_home_data_filtered['home_points']
                )
            )    
            
        if cumulative_away_data_filtered.shape[0] > 0:
            df.at[index, 'away_avg_scoring_margin_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    -cumulative_away_data_filtered['margin'],
                    cumulative_away_data_filtered['margin']
                )
            )
            df.at[index, 'away_avg_ppg_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['home_points'],
                    cumulative_away_data_filtered['away_points']
                )
            )
            df.at[index, 'away_avg_ppg_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['away_points'],
                    cumulative_away_data_filtered['home_points']
                )
            )   
            
        if home_data_past_1_years_filtered.shape[0] > 0:
            df.at[index, 'home_avg_scoring_margin_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    -home_data_past_1_years_filtered['margin'],
                    home_data_past_1_years_filtered['margin']
                )
            )
            df.at[index, 'home_avg_ppg_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['home_points'],
                    home_data_past_1_years_filtered['away_points']
                )
            )
            df.at[index, 'home_avg_ppg_allowed_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['away_points'],
                    home_data_past_1_years_filtered['home_points']
                )
            ) 
            
        if away_data_past_1_years_filtered.shape[0] > 0:
            df.at[index, 'away_avg_scoring_margin_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    -away_data_past_1_years_filtered['margin'],
                    away_data_past_1_years_filtered['margin']
                )
            )
            df.at[index, 'away_avg_ppg_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['home_points'],
                    away_data_past_1_years_filtered['away_points']
                )
            )
            df.at[index, 'away_avg_ppg_allowed_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['away_points'],
                    away_data_past_1_years_filtered['home_points']
                )
            ) 
            
        if home_data_past_2_years_filtered.shape[0] > 0:
            df.at[index, 'home_avg_scoring_margin_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    -home_data_past_2_years_filtered['margin'],
                    home_data_past_2_years_filtered['margin']
                )
            )
            df.at[index, 'home_avg_ppg_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['home_points'],
                    home_data_past_2_years_filtered['away_points']
                )
            )
            df.at[index, 'home_avg_ppg_allowed_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['away_points'],
                    home_data_past_2_years_filtered['home_points']
                )
            ) 
            
        if away_data_past_2_years_filtered.shape[0] > 0:
            df.at[index, 'away_avg_scoring_margin_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    -away_data_past_2_years_filtered['margin'],
                    away_data_past_2_years_filtered['margin']
                )
            )
            df.at[index, 'away_avg_ppg_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['home_points'],
                    away_data_past_2_years_filtered['away_points']
                )
            )
            df.at[index, 'away_avg_ppg_allowed_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['away_points'],
                    away_data_past_2_years_filtered['home_points']
                )
            ) 
            
    df = df.fillna(0)
    return df

In [224]:
def get_past_opponents_ppg_stats(df):
    # Define the initial columns with zeros
    columns_to_initialize = [
        'home_opponents_avg_ppg_cumulative',
        'away_opponents_avg_ppg_cumulative',
        'home_opponents_avg_ppg_past_1_years',
        'away_opponents_avg_ppg_past_1_years',
        'home_opponents_avg_ppg_past_2_years',
        'away_opponents_avg_ppg_past_2_years',
        'home_opponents_avg_ppg_allowed_cumulative',
        'away_opponents_avg_ppg_allowed_cumulative',
        'home_opponents_avg_ppg_allowed_past_1_years',
        'away_opponents_avg_ppg_allowed_past_1_years',
        'home_opponents_avg_ppg_allowed_past_2_years',
        'away_opponents_avg_ppg_allowed_past_2_years'
    ]
    df[columns_to_initialize] = 0.0

    for index, row in df.iterrows():
        year = row['year']
        week = row['week']
        home_team = row['home_team']
        away_team = row['away_team']
    
        home_team_filter_condition = ((df['home_team'] == home_team) | (df['away_team'] == home_team))
        away_team_filter_condition = ((df['home_team'] == away_team) | (df['away_team'] == away_team))
    
        cumulative_date_filter_condition = (df['year'] == year) & (df['week'] < week)
        past_1_years_date_filter_condition = ((df['year'] == year - 1) | ((df['year'] == year) & (df['week'] < week)))
        past_2_years_date_filter_condition = (((df['year'] >= year - 2) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))
    
        cumulative_home_team_opponents = pd.concat([df[cumulative_date_filter_condition].loc[home_team_filter_condition, 'home_team'], df[cumulative_date_filter_condition].loc[home_team_filter_condition, 'away_team']]).unique()
        cumulative_away_team_opponents = pd.concat([df[cumulative_date_filter_condition].loc[away_team_filter_condition, 'home_team'], df[cumulative_date_filter_condition].loc[away_team_filter_condition, 'away_team']]).unique()
        past_1_years_home_team_opponents = pd.concat([df[past_1_years_date_filter_condition].loc[home_team_filter_condition, 'home_team'], df[past_1_years_date_filter_condition].loc[home_team_filter_condition, 'away_team']]).unique()
        past_1_years_away_team_opponents = pd.concat([df[past_1_years_date_filter_condition].loc[away_team_filter_condition, 'home_team'], df[past_1_years_date_filter_condition].loc[away_team_filter_condition, 'away_team']]).unique()
        past_2_years_home_team_opponents = pd.concat([df[past_2_years_date_filter_condition].loc[home_team_filter_condition, 'home_team'], df[past_2_years_date_filter_condition].loc[home_team_filter_condition, 'away_team']]).unique()
        past_2_years_away_team_opponents = pd.concat([df[past_2_years_date_filter_condition].loc[away_team_filter_condition, 'home_team'], df[past_2_years_date_filter_condition].loc[away_team_filter_condition, 'away_team']]).unique()
        
        cumulative_home_team_opponents = [t for t in cumulative_home_team_opponents if t != home_team]
        cumulative_away_team_opponents = [t for t in cumulative_away_team_opponents if t != away_team]
        past_1_years_home_team_opponents = [t for t in past_1_years_home_team_opponents if t != home_team]
        past_1_years_away_team_opponents = [t for t in past_1_years_away_team_opponents if t != away_team]
        past_2_years_home_team_opponents = [t for t in past_2_years_home_team_opponents if t != home_team]
        past_2_years_away_team_opponents = [t for t in past_2_years_away_team_opponents if t != away_team]
        
        home_opponents_avg_ppg_cumulative = []
        away_opponents_avg_ppg_cumulative = []
        home_opponents_ppg_past_1_years = []
        away_opponents_ppg_past_1_years = []
        home_opponents_ppg_past_2_years = []
        away_opponents_ppg_past_2_years = []
        
        home_opponents_avg_ppg_allowed_cumulative = []
        away_opponents_avg_ppg_allowed_cumulative = []
        home_opponents_ppg_allowed_past_1_years = []
        away_opponents_ppg_allowed_past_1_years = []
        home_opponents_ppg_allowed_past_2_years = []
        away_opponents_ppg_allowed_past_2_years = []
        
        for team in cumulative_home_team_opponents:
            team_data = df[cumulative_date_filter_condition][((df[cumulative_date_filter_condition]['home_team'] == team) | (df[cumulative_date_filter_condition]['away_team'] == team))]
            home_opponents_avg_ppg_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_points'],
                        team_data['away_points']
                    )
                )
            )
            home_opponents_avg_ppg_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_points'],
                        team_data['home_points']
                    )
                )
            )
            
        for team in cumulative_away_team_opponents:
            team_data = df[cumulative_date_filter_condition][((df[cumulative_date_filter_condition]['home_team'] == team) | (df[cumulative_date_filter_condition]['away_team'] == team))]
            away_opponents_avg_ppg_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_points'],
                        team_data['away_points']
                    )
                )
            )
            away_opponents_avg_ppg_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_points'],
                        team_data['home_points']
                    )
                )
            )
            
        for team in past_1_years_home_team_opponents:
            team_data_past_1_years = df[past_1_years_date_filter_condition][((df[past_1_years_date_filter_condition]['home_team'] == team) | (df[past_1_years_date_filter_condition]['away_team'] == team))]
            home_opponents_ppg_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_points'],
                        team_data_past_1_years['away_points']
                    )
                )
            ) 
            home_opponents_ppg_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_points'],
                        team_data_past_1_years['home_points']
                    )
                )
            )
            
        for team in past_1_years_away_team_opponents:        
            team_data_past_1_years = df[past_1_years_date_filter_condition][((df[past_1_years_date_filter_condition]['home_team'] == team) | (df[past_1_years_date_filter_condition]['away_team'] == team))]
            away_opponents_ppg_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_points'],
                        team_data_past_1_years['away_points']
                    )
                )
            )
            away_opponents_ppg_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_points'],
                        team_data_past_1_years['home_points']
                    )
                )
            )
            
        for team in past_2_years_home_team_opponents:    
            team_data_past_2_years = df[past_2_years_date_filter_condition][((df[past_2_years_date_filter_condition]['home_team'] == team) | (df[past_2_years_date_filter_condition]['away_team'] == team))]
            home_opponents_ppg_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_points'],
                        team_data_past_2_years['away_points']
                    )
                )
            )
            home_opponents_ppg_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_points'],
                        team_data_past_2_years['home_points']
                    )
                )
            )
            
        for team in past_2_years_away_team_opponents:        
            team_data_past_2_years = df[past_2_years_date_filter_condition][((df[past_2_years_date_filter_condition]['home_team'] == team) | (df[past_2_years_date_filter_condition]['away_team'] == team))]
            away_opponents_ppg_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_points'],
                        team_data_past_2_years['away_points']
                    )
                )
            )
            away_opponents_ppg_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_points'],
                        team_data_past_2_years['home_points']
                    )
                )
            )
            
        if len(home_opponents_avg_ppg_cumulative) > 0:
            df.at[index, 'home_opponents_avg_ppg_cumulative'] = sum(home_opponents_avg_ppg_cumulative) / len(home_opponents_avg_ppg_cumulative)
        if len(away_opponents_avg_ppg_cumulative) > 0:
            df.at[index, 'away_opponents_avg_ppg_cumulative'] = sum(away_opponents_avg_ppg_cumulative) / len(away_opponents_avg_ppg_cumulative)
        if len(home_opponents_ppg_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_ppg_past_1_years'] = sum(home_opponents_ppg_past_1_years) / len(home_opponents_ppg_past_1_years)
        if len(away_opponents_ppg_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_ppg_past_1_years'] = sum(away_opponents_ppg_past_1_years) / len(away_opponents_ppg_past_1_years)
        if len(home_opponents_ppg_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_ppg_past_2_years'] = sum(home_opponents_ppg_past_2_years) / len(home_opponents_ppg_past_2_years)
        if len(away_opponents_ppg_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_ppg_past_2_years'] = sum(away_opponents_ppg_past_2_years) / len(away_opponents_ppg_past_2_years)
        if len(home_opponents_avg_ppg_allowed_cumulative) > 0:
            df.at[index, 'home_opponents_avg_ppg_allowed_cumulative'] = sum(home_opponents_avg_ppg_allowed_cumulative) / len(home_opponents_avg_ppg_allowed_cumulative)
        if len(away_opponents_avg_ppg_allowed_cumulative) > 0:
            df.at[index, 'away_opponents_avg_ppg_allowed_cumulative'] = sum(away_opponents_avg_ppg_allowed_cumulative) / len(away_opponents_avg_ppg_allowed_cumulative)
        if len(home_opponents_ppg_allowed_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_ppg_allowed_past_1_years'] = sum(home_opponents_ppg_allowed_past_1_years) / len(home_opponents_ppg_allowed_past_1_years)
        if len(away_opponents_ppg_allowed_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_ppg_allowed_past_1_years'] = sum(away_opponents_ppg_allowed_past_1_years) / len(away_opponents_ppg_allowed_past_1_years)
        if len(home_opponents_ppg_allowed_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_ppg_allowed_past_2_years'] = sum(home_opponents_ppg_allowed_past_2_years) / len(home_opponents_ppg_allowed_past_2_years)
        if len(away_opponents_ppg_allowed_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_ppg_allowed_past_2_years'] = sum(away_opponents_ppg_allowed_past_2_years) / len(away_opponents_ppg_allowed_past_2_years)        
    return df

In [225]:
def get_relative_ppg_stats(df):
    print("get_relative_ppg_stats")

    df = get_past_ppg_stats(df)
    df = get_past_opponents_ppg_stats(df)
    
    # Calculate relative scoring margins for each statistic
    relative_stats = [
        ('home_avg_ppg_cumulative', 'home_opponents_avg_ppg_allowed_cumulative', 'home_relative_avg_ppg_cumulative'),
        ('away_avg_ppg_cumulative', 'away_opponents_avg_ppg_allowed_cumulative', 'away_relative_avg_ppg_cumulative'),
        ('home_avg_ppg_past_1_years', 'home_opponents_avg_ppg_allowed_past_1_years', 'home_relative_avg_ppg_past_1_years'),
        ('away_avg_ppg_past_1_years', 'away_opponents_avg_ppg_allowed_past_1_years', 'away_relative_avg_ppg_past_1_years'),
        ('home_avg_ppg_past_2_years', 'home_opponents_avg_ppg_allowed_past_2_years', 'home_relative_avg_ppg_past_2_years'),
        ('away_avg_ppg_past_2_years', 'away_opponents_avg_ppg_allowed_past_2_years', 'away_relative_avg_ppg_past_2_years'),
        
        ('home_avg_ppg_allowed_cumulative', 'home_opponents_avg_ppg_cumulative', 'home_relative_avg_ppg_allowed_cumulative'),
        ('away_avg_ppg_allowed_cumulative', 'away_opponents_avg_ppg_cumulative', 'away_relative_avg_ppg_allowed_cumulative'),
        ('home_avg_ppg_allowed_past_1_years', 'home_opponents_avg_ppg_past_1_years', 'home_relative_avg_ppg_allowed_past_1_years'),
        ('away_avg_ppg_allowed_past_1_years', 'away_opponents_avg_ppg_past_1_years', 'away_relative_avg_ppg_allowed_past_1_years'),
        ('home_avg_ppg_allowed_past_2_years', 'home_opponents_avg_ppg_past_2_years', 'home_relative_avg_ppg_allowed_past_2_years'),
        ('away_avg_ppg_allowed_past_2_years', 'away_opponents_avg_ppg_past_2_years', 'away_relative_avg_ppg_allowed_past_2_years')
    ]
    
    for stat, opponent_stat, relative_stat in relative_stats:
        df[relative_stat] = np.where(df[opponent_stat] == 0, 0.0, (df[stat] - df[opponent_stat]) / df[opponent_stat])
    
    return df

In [226]:
# These columns won't be used for training.  They're just to generate relative stat values
def get_per_game_stats(df, start_year, end_year):
    print("get_per_game_stats")

    max_week = df['week'].max()
    stat_name_mappings = {
        'yardsPerRushAttempt': 'game_yards_per_carry',
        'rushingYards': 'game_rushing_yards',
        'yardsPerPass': 'game_yards_per_pass',
        'netPassingYards': 'game_passing_yards',
        'totalYards': 'game_total_yards'
    }

    # Collect data for the consensus spread of the games
    for year in range(start_year, end_year + 1):
        for week in range (1, max_week):
            params = {
                "year": year,
                "week": week
            }
            
            game_data = get_api_data("/games/teams", params)
        
            for game_stat in game_data:
                for team_stats in game_stat['teams']:
                    team = team_stats['school']
                    for stat in team_stats['stats']:
                        if stat['category'] in ['yardsPerRushAttempt', 'rushingYards', 'yardsPerPass', 'netPassingYards', 'totalYards']:
                            if team_stats['homeAway'] == 'home':
                                matching_row = df[(df['year'] == year) & (df['week'] == week) &
                                                  ((df['home_team'] == team))]
                
                                # Update the DataFrame with the stats
                                column_name = f'home_{stat_name_mappings[stat["category"]]}'
                                df.loc[matching_row.index, column_name] = pd.to_numeric(stat['stat'], errors='coerce')
                            elif team_stats['homeAway'] == 'away':
                                matching_row = df[(df['year'] == year) & (df['week'] == week) &
                                                  ((df['away_team'] == team))]
                
                                # Update the DataFrame with the stats
                                column_name = f'away_{stat_name_mappings[stat["category"]]}'
                                df.loc[matching_row.index, column_name] = pd.to_numeric(stat['stat'], errors='coerce')
    return df

In [227]:
def get_past_ypg_stats(df):
    columns_to_initialize = [
        'home_avg_total_ypg_cumulative',
        'away_avg_total_ypg_cumulative',
        'home_avg_total_ypg_past_1_years',
        'away_avg_total_ypg_past_1_years',
        'home_avg_total_ypg_past_2_years',
        'away_avg_total_ypg_past_2_years',
        'home_avg_total_ypg_allowed_cumulative',
        'away_avg_total_ypg_allowed_cumulative',
        'home_avg_total_ypg_allowed_past_1_years',
        'away_avg_total_ypg_allowed_past_1_years',
        'home_avg_total_ypg_allowed_past_2_years',
        'away_avg_total_ypg_allowed_past_2_years',

        'home_avg_passing_ypg_cumulative',
        'away_avg_passing_ypg_cumulative',
        'home_avg_passing_ypg_past_1_years',
        'away_avg_passing_ypg_past_1_years',
        'home_avg_passing_ypg_past_2_years',
        'away_avg_passing_ypg_past_2_years',
        'home_avg_passing_ypg_allowed_cumulative',
        'away_avg_passing_ypg_allowed_cumulative',
        'home_avg_passing_ypg_allowed_past_1_years',
        'away_avg_passing_ypg_allowed_past_1_years',
        'home_avg_passing_ypg_allowed_past_2_years',
        'away_avg_passing_ypg_allowed_past_2_years',

        'home_avg_passing_ypc_cumulative',
        'away_avg_passing_ypc_cumulative',
        'home_avg_passing_ypc_past_1_years',
        'away_avg_passing_ypc_past_1_years',
        'home_avg_passing_ypc_past_2_years',
        'away_avg_passing_ypc_past_2_years',
        'home_avg_passing_ypc_allowed_cumulative',
        'away_avg_passing_ypc_allowed_cumulative',
        'home_avg_passing_ypc_allowed_past_1_years',
        'away_avg_passing_ypc_allowed_past_1_years',
        'home_avg_passing_ypc_allowed_past_2_years',
        'away_avg_passing_ypc_allowed_past_2_years',
        
        'home_avg_rushing_ypg_cumulative',
        'away_avg_rushing_ypg_cumulative',
        'home_avg_rushing_ypg_past_1_years',
        'away_avg_rushing_ypg_past_1_years',
        'home_avg_rushing_ypg_past_2_years',
        'away_avg_rushing_ypg_past_2_years',
        'home_avg_rushing_ypg_allowed_cumulative',
        'away_avg_rushing_ypg_allowed_cumulative',
        'home_avg_rushing_ypg_allowed_past_1_years',
        'away_avg_rushing_ypg_allowed_past_1_years',
        'home_avg_rushing_ypg_allowed_past_2_years',
        'away_avg_rushing_ypg_allowed_past_2_years',

        'home_avg_rushing_ypc_cumulative',
        'away_avg_rushing_ypc_cumulative',
        'home_avg_rushing_ypc_past_1_years',
        'away_avg_rushing_ypc_past_1_years',
        'home_avg_rushing_ypc_past_2_years',
        'away_avg_rushing_ypc_past_2_years',
        'home_avg_rushing_ypc_allowed_cumulative',
        'away_avg_rushing_ypc_allowed_cumulative',
        'home_avg_rushing_ypc_allowed_past_1_years',
        'away_avg_rushing_ypc_allowed_past_1_years',
        'home_avg_rushing_ypc_allowed_past_2_years',
        'away_avg_rushing_ypc_allowed_past_2_years',
    ]

    df[columns_to_initialize] = 0.0

    for index, row in df.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        year = row['year']
        week = row['week']
    
         # cumulative -> filter same year less week
        cumulative_home_data_filtered = df[((df['home_team'] == home_team) | (df['away_team'] == home_team)) & (df['year'] == year) & (df['week'] < week)]
        cumulative_away_data_filtered = df[((df['home_team'] == away_team) | (df['away_team'] == away_team)) & (df['year'] == year) & (df['week'] < week)]
    
        home_data_past_1_years_filtered = df[((df['home_team'] == home_team) | (df['away_team'] == home_team)) & ((df['year'] == year - 1) | ((df['year'] == year) & (df['week'] < week)))]
        away_data_past_1_years_filtered = df[((df['home_team'] == away_team) | (df['away_team'] == away_team)) & ((df['year'] == year - 1) | ((df['year'] == year) & (df['week'] < week)))]
    
        home_data_past_2_years_filtered = df[((df['home_team'] == home_team) | (df['away_team'] == home_team)) & (((df['year'] >= year - 2) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))]
        away_data_past_2_years_filtered = df[((df['home_team'] == away_team) | (df['away_team'] == away_team)) & (((df['year'] >= year - 2) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))]



        if cumulative_home_data_filtered.shape[0] > 0:
            df.at[index, 'home_avg_total_ypg_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['home_game_total_yards'],
                    cumulative_home_data_filtered['away_game_total_yards']
                )
            )
            df.at[index, 'home_avg_total_ypg_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['away_game_total_yards'],
                    cumulative_home_data_filtered['home_game_total_yards']
                )
            )
            df.at[index, 'home_avg_passing_ypg_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['home_game_passing_yards'],
                    cumulative_home_data_filtered['away_game_passing_yards']
                )
            )
            df.at[index, 'home_avg_passing_ypg_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['away_game_passing_yards'],
                    cumulative_home_data_filtered['home_game_passing_yards']
                )
            )    
            df.at[index, 'home_avg_passing_ypc_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['home_game_yards_per_pass'],
                    cumulative_home_data_filtered['away_game_yards_per_pass']
                )
            )
            df.at[index, 'home_avg_passing_ypc_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['away_game_yards_per_pass'],
                    cumulative_home_data_filtered['home_game_yards_per_pass']
                )
            )   
            df.at[index, 'home_avg_rushing_ypg_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['home_game_rushing_yards'],
                    cumulative_home_data_filtered['away_game_rushing_yards']
                )
            )
            df.at[index, 'home_avg_rushing_ypg_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['away_game_rushing_yards'],
                    cumulative_home_data_filtered['home_game_rushing_yards']
                )
            )  
            df.at[index, 'home_avg_rushing_ypc_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['home_game_yards_per_carry'],
                    cumulative_home_data_filtered['away_game_yards_per_carry']
                )
            )
            df.at[index, 'home_avg_rushing_ypc_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_home_data_filtered['home_team'] == home_team,
                    cumulative_home_data_filtered['away_game_yards_per_carry'],
                    cumulative_home_data_filtered['home_game_yards_per_carry']
                )
            )  

        
        if cumulative_away_data_filtered.shape[0] > 0:
            df.at[index, 'away_avg_total_ypg_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['home_game_total_yards'],
                    cumulative_away_data_filtered['away_game_total_yards']
                )
            )
            df.at[index, 'away_avg_total_ypg_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['away_game_total_yards'],
                    cumulative_away_data_filtered['home_game_total_yards']
                )
            )   
            df.at[index, 'away_avg_passing_ypg_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['home_game_passing_yards'],
                    cumulative_away_data_filtered['away_game_passing_yards']
                )
            )
            df.at[index, 'away_avg_passing_ypg_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['away_game_passing_yards'],
                    cumulative_away_data_filtered['home_game_passing_yards']
                )
            )
            df.at[index, 'away_avg_passing_ypc_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['home_game_yards_per_pass'],
                    cumulative_away_data_filtered['away_game_yards_per_pass']
                )
            )
            df.at[index, 'away_avg_passing_ypc_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['away_game_yards_per_pass'],
                    cumulative_away_data_filtered['home_game_yards_per_pass']
                )
            )  
            df.at[index, 'away_avg_rushing_ypg_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['home_game_rushing_yards'],
                    cumulative_away_data_filtered['away_game_rushing_yards']
                )
            )
            df.at[index, 'away_avg_rushing_ypg_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['away_game_rushing_yards'],
                    cumulative_away_data_filtered['home_game_rushing_yards']
                )
            )
            df.at[index, 'away_avg_rushing_ypc_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['home_game_yards_per_carry'],
                    cumulative_away_data_filtered['away_game_yards_per_carry']
                )
            )
            df.at[index, 'away_avg_rushing_ypc_allowed_cumulative'] = np.nanmean(
                np.where(
                    cumulative_away_data_filtered['home_team'] == away_team,
                    cumulative_away_data_filtered['away_game_yards_per_carry'],
                    cumulative_away_data_filtered['home_game_yards_per_carry']
                )
            )

        
        if home_data_past_1_years_filtered.shape[0] > 0:
            df.at[index, 'home_avg_total_ypg_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['home_game_total_yards'],
                    home_data_past_1_years_filtered['away_game_total_yards']
                )
            )
            df.at[index, 'home_avg_total_ypg_allowed_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['away_game_total_yards'],
                    home_data_past_1_years_filtered['home_game_total_yards']
                )
            ) 
            df.at[index, 'home_avg_passing_ypg_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['home_game_passing_yards'],
                    home_data_past_1_years_filtered['away_game_passing_yards']
                )
            )
            df.at[index, 'home_avg_passing_ypg_allowed_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['away_game_passing_yards'],
                    home_data_past_1_years_filtered['home_game_passing_yards']
                )
            )   
            df.at[index, 'home_avg_passing_ypc_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['home_game_yards_per_pass'],
                    home_data_past_1_years_filtered['away_game_yards_per_pass']
                )
            )
            df.at[index, 'home_avg_passing_ypc_allowed_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['away_game_yards_per_pass'],
                    home_data_past_1_years_filtered['home_game_yards_per_pass']
                )
            )  
            df.at[index, 'home_avg_rushing_ypg_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['home_game_rushing_yards'],
                    home_data_past_1_years_filtered['away_game_rushing_yards']
                )
            )
            df.at[index, 'home_avg_rushing_ypg_allowed_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['away_game_rushing_yards'],
                    home_data_past_1_years_filtered['home_game_rushing_yards']
                )
            )
            df.at[index, 'home_avg_rushing_ypc_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['home_game_yards_per_carry'],
                    home_data_past_1_years_filtered['away_game_yards_per_carry']
                )
            )
            df.at[index, 'home_avg_rushing_ypc_allowed_past_1_years'] = np.nanmean(
                np.where(
                    home_data_past_1_years_filtered['home_team'] == home_team,
                    home_data_past_1_years_filtered['away_game_yards_per_carry'],
                    home_data_past_1_years_filtered['home_game_yards_per_carry']
                )
            )             
          
        if away_data_past_1_years_filtered.shape[0] > 0:
            df.at[index, 'away_avg_total_ypg_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['home_game_total_yards'],
                    away_data_past_1_years_filtered['away_game_total_yards']
                )
            )
            df.at[index, 'away_avg_total_ypg_allowed_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['away_game_total_yards'],
                    away_data_past_1_years_filtered['home_game_total_yards']
                )
            ) 
            df.at[index, 'away_avg_passing_ypg_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['home_game_passing_yards'],
                    away_data_past_1_years_filtered['away_game_passing_yards']
                )
            )
            df.at[index, 'away_avg_passing_ypg_allowed_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['away_game_passing_yards'],
                    away_data_past_1_years_filtered['home_game_passing_yards']
                )
            )   
            df.at[index, 'away_avg_passing_ypc_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['home_game_yards_per_pass'],
                    away_data_past_1_years_filtered['away_game_yards_per_pass']
                )
            )
            df.at[index, 'away_avg_passing_ypc_allowed_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['away_game_yards_per_pass'],
                    away_data_past_1_years_filtered['home_game_yards_per_pass']
                )
            )   
            df.at[index, 'away_avg_rushing_ypg_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['home_game_rushing_yards'],
                    away_data_past_1_years_filtered['away_game_rushing_yards']
                )
            )
            df.at[index, 'away_avg_rushing_ypg_allowed_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['away_game_rushing_yards'],
                    away_data_past_1_years_filtered['home_game_rushing_yards']
                )
            ) 
            df.at[index, 'away_avg_rushing_ypc_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['home_game_yards_per_carry'],
                    away_data_past_1_years_filtered['away_game_yards_per_carry']
                )
            )
            df.at[index, 'away_avg_rushing_ypc_allowed_past_1_years'] = np.nanmean(
                np.where(
                    away_data_past_1_years_filtered['home_team'] == away_team,
                    away_data_past_1_years_filtered['away_game_yards_per_carry'],
                    away_data_past_1_years_filtered['home_game_yards_per_carry']
                )
            )             
        
        if home_data_past_2_years_filtered.shape[0] > 0:
            df.at[index, 'home_avg_total_ypg_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['home_game_total_yards'],
                    home_data_past_2_years_filtered['away_game_total_yards']
                )
            )
            df.at[index, 'home_avg_total_ypg_allowed_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['away_game_total_yards'],
                    home_data_past_2_years_filtered['home_game_total_yards']
                )
            ) 
            df.at[index, 'home_avg_passing_ypg_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['home_game_passing_yards'],
                    home_data_past_2_years_filtered['away_game_passing_yards']
                )
            )
            df.at[index, 'home_avg_passing_ypg_allowed_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['away_game_passing_yards'],
                    home_data_past_2_years_filtered['home_game_passing_yards']
                )
            ) 
            df.at[index, 'home_avg_passing_ypc_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['home_game_yards_per_pass'],
                    home_data_past_2_years_filtered['away_game_yards_per_pass']
                )
            )
            df.at[index, 'home_avg_passing_ypc_allowed_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['away_game_yards_per_pass'],
                    home_data_past_2_years_filtered['home_game_yards_per_pass']
                )
            ) 
            df.at[index, 'home_avg_rushing_ypg_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['home_game_rushing_yards'],
                    home_data_past_2_years_filtered['away_game_rushing_yards']
                )
            )
            df.at[index, 'home_avg_rushing_ypg_allowed_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['away_game_rushing_yards'],
                    home_data_past_2_years_filtered['home_game_rushing_yards']
                )
            ) 
            df.at[index, 'home_avg_rushing_ypc_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['home_game_yards_per_carry'],
                    home_data_past_2_years_filtered['away_game_yards_per_carry']
                )
            )
            df.at[index, 'home_avg_rushing_ypc_allowed_past_2_years'] = np.nanmean(
                np.where(
                    home_data_past_2_years_filtered['home_team'] == home_team,
                    home_data_past_2_years_filtered['away_game_yards_per_carry'],
                    home_data_past_2_years_filtered['home_game_yards_per_carry']
                )
            ) 
       
        if away_data_past_2_years_filtered.shape[0] > 0:
            df.at[index, 'away_avg_total_ypg_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['home_game_total_yards'],
                    away_data_past_2_years_filtered['away_game_total_yards']
                )
            )
            df.at[index, 'away_avg_total_ypg_allowed_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['away_game_total_yards'],
                    away_data_past_2_years_filtered['home_game_total_yards']
                )
            ) 
            df.at[index, 'away_avg_passing_ypg_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['home_game_passing_yards'],
                    away_data_past_2_years_filtered['away_game_passing_yards']
                )
            )
            df.at[index, 'away_avg_passing_ypg_allowed_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['away_game_passing_yards'],
                    away_data_past_2_years_filtered['home_game_passing_yards']
                )
            ) 
            df.at[index, 'away_avg_passing_ypc_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['home_game_yards_per_pass'],
                    away_data_past_2_years_filtered['away_game_yards_per_pass']
                )
            )
            df.at[index, 'away_avg_passing_ypc_allowed_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['away_game_yards_per_pass'],
                    away_data_past_2_years_filtered['home_game_yards_per_pass']
                )
            )             
            df.at[index, 'away_avg_rushing_ypg_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['home_game_rushing_yards'],
                    away_data_past_2_years_filtered['away_game_rushing_yards']
                )
            )
            df.at[index, 'away_avg_rushing_ypg_allowed_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['away_game_rushing_yards'],
                    away_data_past_2_years_filtered['home_game_rushing_yards']
                )
            ) 
            df.at[index, 'away_avg_rushing_ypc_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['home_game_yards_per_carry'],
                    away_data_past_2_years_filtered['away_game_yards_per_carry']
                )
            )
            df.at[index, 'away_avg_rushing_ypc_allowed_past_2_years'] = np.nanmean(
                np.where(
                    away_data_past_2_years_filtered['home_team'] == away_team,
                    away_data_past_2_years_filtered['away_game_yards_per_carry'],
                    away_data_past_2_years_filtered['home_game_yards_per_carry']
                )
            )             
    
    df = df.fillna(0)
    return df

In [228]:
def get_past_opponents_ypg_stats(df):
    columns_to_initialize = [
        'home_opponents_avg_total_ypg_cumulative',
        'away_opponents_avg_total_ypg_cumulative',
        'home_opponents_avg_total_ypg_past_1_years',
        'away_opponents_avg_total_ypg_past_1_years',
        'home_opponents_avg_total_ypg_past_2_years',
        'away_opponents_avg_total_ypg_past_2_years',
        'home_opponents_avg_total_ypg_allowed_cumulative',
        'away_opponents_avg_total_ypg_allowed_cumulative',
        'home_opponents_avg_total_ypg_allowed_past_1_years',
        'away_opponents_avg_total_ypg_allowed_past_1_years',
        'home_opponents_avg_total_ypg_allowed_past_2_years',
        'away_opponents_avg_total_ypg_allowed_past_2_years',

        'home_opponents_avg_passing_ypg_cumulative',
        'away_opponents_avg_passing_ypg_cumulative',
        'home_opponents_avg_passing_ypg_past_1_years',
        'away_opponents_avg_passing_ypg_past_1_years',
        'home_opponents_avg_passing_ypg_past_2_years',
        'away_opponents_avg_passing_ypg_past_2_years',
        'home_opponents_avg_passing_ypg_allowed_cumulative',
        'away_opponents_avg_passing_ypg_allowed_cumulative',
        'home_opponents_avg_passing_ypg_allowed_past_1_years',
        'away_opponents_avg_passing_ypg_allowed_past_1_years',
        'home_opponents_avg_passing_ypg_allowed_past_2_years',
        'away_opponents_avg_passing_ypg_allowed_past_2_years',

        'home_opponents_avg_passing_ypc_cumulative',
        'away_opponents_avg_passing_ypc_cumulative',
        'home_opponents_avg_passing_ypc_past_1_years',
        'away_opponents_avg_passing_ypc_past_1_years',
        'home_opponents_avg_passing_ypc_past_2_years',
        'away_opponents_avg_passing_ypc_past_2_years',
        'home_opponents_avg_passing_ypc_allowed_cumulative',
        'away_opponents_avg_passing_ypc_allowed_cumulative',
        'home_opponents_avg_passing_ypc_allowed_past_1_years',
        'away_opponents_avg_passing_ypc_allowed_past_1_years',
        'home_opponents_avg_passing_ypc_allowed_past_2_years',
        'away_opponents_avg_passing_ypc_allowed_past_2_years',
        
        'home_opponents_avg_rushing_ypg_cumulative',
        'away_opponents_avg_rushing_ypg_cumulative',
        'home_opponents_avg_rushing_ypg_past_1_years',
        'away_opponents_avg_rushing_ypg_past_1_years',
        'home_opponents_avg_rushing_ypg_past_2_years',
        'away_opponents_avg_rushing_ypg_past_2_years',
        'home_opponents_avg_rushing_ypg_allowed_cumulative',
        'away_opponents_avg_rushing_ypg_allowed_cumulative',
        'home_opponents_avg_rushing_ypg_allowed_past_1_years',
        'away_opponents_avg_rushing_ypg_allowed_past_1_years',
        'home_opponents_avg_rushing_ypg_allowed_past_2_years',
        'away_opponents_avg_rushing_ypg_allowed_past_2_years',

        'home_opponents_avg_rushing_ypc_cumulative',
        'away_opponents_avg_rushing_ypc_cumulative',
        'home_opponents_avg_rushing_ypc_past_1_years',
        'away_opponents_avg_rushing_ypc_past_1_years',
        'home_opponents_avg_rushing_ypc_past_2_years',
        'away_opponents_avg_rushing_ypc_past_2_years',
        'home_opponents_avg_rushing_ypc_allowed_cumulative',
        'away_opponents_avg_rushing_ypc_allowed_cumulative',
        'home_opponents_avg_rushing_ypc_allowed_past_1_years',
        'away_opponents_avg_rushing_ypc_allowed_past_1_years',
        'home_opponents_avg_rushing_ypc_allowed_past_2_years',
        'away_opponents_avg_rushing_ypc_allowed_past_2_years'
    ]

    df[columns_to_initialize] = 0.0

    for index, row in df.iterrows():
        year = row['year']
        week = row['week']
        home_team = row['home_team']
        away_team = row['away_team']
    
        home_team_filter_condition = ((df['home_team'] == home_team) | (df['away_team'] == home_team))
        away_team_filter_condition = ((df['home_team'] == away_team) | (df['away_team'] == away_team))
    
        cumulative_date_filter_condition = (df['year'] == year) & (df['week'] < week)
        past_1_years_date_filter_condition = ((df['year'] == year - 1) | ((df['year'] == year) & (df['week'] < week)))
        past_2_years_date_filter_condition = (((df['year'] >= year - 2) & (df['year'] < year)) | ((df['year'] == year) & (df['week'] < week)))
    
        cumulative_home_team_opponents = pd.concat([df[cumulative_date_filter_condition].loc[home_team_filter_condition, 'home_team'], df[cumulative_date_filter_condition].loc[home_team_filter_condition, 'away_team']]).unique()
        cumulative_away_team_opponents = pd.concat([df[cumulative_date_filter_condition].loc[away_team_filter_condition, 'home_team'], df[cumulative_date_filter_condition].loc[away_team_filter_condition, 'away_team']]).unique()
        past_1_years_home_team_opponents = pd.concat([df[past_1_years_date_filter_condition].loc[home_team_filter_condition, 'home_team'], df[past_1_years_date_filter_condition].loc[home_team_filter_condition, 'away_team']]).unique()
        past_1_years_away_team_opponents = pd.concat([df[past_1_years_date_filter_condition].loc[away_team_filter_condition, 'home_team'], df[past_1_years_date_filter_condition].loc[away_team_filter_condition, 'away_team']]).unique()
        past_2_years_home_team_opponents = pd.concat([df[past_2_years_date_filter_condition].loc[home_team_filter_condition, 'home_team'], df[past_2_years_date_filter_condition].loc[home_team_filter_condition, 'away_team']]).unique()
        past_2_years_away_team_opponents = pd.concat([df[past_2_years_date_filter_condition].loc[away_team_filter_condition, 'home_team'], df[past_2_years_date_filter_condition].loc[away_team_filter_condition, 'away_team']]).unique()
        
        cumulative_home_team_opponents = [t for t in cumulative_home_team_opponents if t != home_team]
        cumulative_away_team_opponents = [t for t in cumulative_away_team_opponents if t != away_team]
        past_1_years_home_team_opponents = [t for t in past_1_years_home_team_opponents if t != home_team]
        past_1_years_away_team_opponents = [t for t in past_1_years_away_team_opponents if t != away_team]
        past_2_years_home_team_opponents = [t for t in past_2_years_home_team_opponents if t != home_team]
        past_2_years_away_team_opponents = [t for t in past_2_years_away_team_opponents if t != away_team]
        
        home_opponents_avg_total_ypg_cumulative = []
        away_opponents_avg_total_ypg_cumulative = []
        home_opponents_avg_total_ypg_past_1_years = []
        away_opponents_avg_total_ypg_past_1_years = []
        home_opponents_avg_total_ypg_past_2_years = []
        away_opponents_avg_total_ypg_past_2_years = []
        home_opponents_avg_total_ypg_allowed_cumulative = []
        away_opponents_avg_total_ypg_allowed_cumulative = []
        home_opponents_avg_total_ypg_allowed_past_1_years = []
        away_opponents_avg_total_ypg_allowed_past_1_years = []
        home_opponents_avg_total_ypg_allowed_past_2_years = []
        away_opponents_avg_total_ypg_allowed_past_2_years = []

        home_opponents_avg_passing_ypg_cumulative = []
        away_opponents_avg_passing_ypg_cumulative = []
        home_opponents_avg_passing_ypg_past_1_years = []
        away_opponents_avg_passing_ypg_past_1_years = []
        home_opponents_avg_passing_ypg_past_2_years = []
        away_opponents_avg_passing_ypg_past_2_years = []
        home_opponents_avg_passing_ypg_allowed_cumulative = []
        away_opponents_avg_passing_ypg_allowed_cumulative = []
        home_opponents_avg_passing_ypg_allowed_past_1_years = []
        away_opponents_avg_passing_ypg_allowed_past_1_years = []
        home_opponents_avg_passing_ypg_allowed_past_2_years = []
        away_opponents_avg_passing_ypg_allowed_past_2_years = []

        home_opponents_avg_passing_ypc_cumulative = []
        away_opponents_avg_passing_ypc_cumulative = []
        home_opponents_avg_passing_ypc_past_1_years = []
        away_opponents_avg_passing_ypc_past_1_years = []
        home_opponents_avg_passing_ypc_past_2_years = []
        away_opponents_avg_passing_ypc_past_2_years = []
        home_opponents_avg_passing_ypc_allowed_cumulative = []
        away_opponents_avg_passing_ypc_allowed_cumulative = []
        home_opponents_avg_passing_ypc_allowed_past_1_years = []
        away_opponents_avg_passing_ypc_allowed_past_1_years = []
        home_opponents_avg_passing_ypc_allowed_past_2_years = []
        away_opponents_avg_passing_ypc_allowed_past_2_years = []

        home_opponents_avg_rushing_ypg_cumulative = []
        away_opponents_avg_rushing_ypg_cumulative = []
        home_opponents_avg_rushing_ypg_past_1_years = []
        away_opponents_avg_rushing_ypg_past_1_years = []
        home_opponents_avg_rushing_ypg_past_2_years = []
        away_opponents_avg_rushing_ypg_past_2_years = []
        home_opponents_avg_rushing_ypg_allowed_cumulative = []
        away_opponents_avg_rushing_ypg_allowed_cumulative = []
        home_opponents_avg_rushing_ypg_allowed_past_1_years = []
        away_opponents_avg_rushing_ypg_allowed_past_1_years = []
        home_opponents_avg_rushing_ypg_allowed_past_2_years = []
        away_opponents_avg_rushing_ypg_allowed_past_2_years = []

        home_opponents_avg_rushing_ypc_cumulative = []
        away_opponents_avg_rushing_ypc_cumulative = []
        home_opponents_avg_rushing_ypc_past_1_years = []
        away_opponents_avg_rushing_ypc_past_1_years = []
        home_opponents_avg_rushing_ypc_past_2_years = []
        away_opponents_avg_rushing_ypc_past_2_years = []
        home_opponents_avg_rushing_ypc_allowed_cumulative = []
        away_opponents_avg_rushing_ypc_allowed_cumulative = []
        home_opponents_avg_rushing_ypc_allowed_past_1_years = []
        away_opponents_avg_rushing_ypc_allowed_past_1_years = []
        home_opponents_avg_rushing_ypc_allowed_past_2_years = []
        away_opponents_avg_rushing_ypc_allowed_past_2_years = []

        
        for team in cumulative_home_team_opponents:
            team_data = df[cumulative_date_filter_condition][((df[cumulative_date_filter_condition]['home_team'] == team) | (df[cumulative_date_filter_condition]['away_team'] == team))]
            home_opponents_avg_total_ypg_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_total_yards'],
                        team_data['away_game_total_yards']
                    )
                )
            )
            home_opponents_avg_total_ypg_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_total_yards'],
                        team_data['home_game_total_yards']
                    )
                )
            )
            home_opponents_avg_passing_ypg_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_passing_yards'],
                        team_data['away_game_passing_yards']
                    )
                )
            )
            home_opponents_avg_passing_ypg_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_passing_yards'],
                        team_data['home_game_passing_yards']
                    )
                )
            )
            home_opponents_avg_passing_ypc_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_yards_per_pass'],
                        team_data['away_game_yards_per_pass']
                    )
                )
            )
            home_opponents_avg_passing_ypc_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_yards_per_pass'],
                        team_data['home_game_yards_per_pass']
                    )
                )
            )       
            home_opponents_avg_rushing_ypg_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_rushing_yards'],
                        team_data['away_game_rushing_yards']
                    )
                )
            )
            home_opponents_avg_rushing_ypg_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_rushing_yards'],
                        team_data['home_game_rushing_yards']
                    )
                )
            )
            home_opponents_avg_rushing_ypc_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_yards_per_carry'],
                        team_data['away_game_yards_per_carry']
                    )
                )
            )
            home_opponents_avg_rushing_ypc_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_yards_per_carry'],
                        team_data['home_game_yards_per_carry']
                    )
                )
            )            
        
        for team in cumulative_away_team_opponents:
            team_data = df[cumulative_date_filter_condition][((df[cumulative_date_filter_condition]['home_team'] == team) | (df[cumulative_date_filter_condition]['away_team'] == team))]
            away_opponents_avg_total_ypg_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_total_yards'],
                        team_data['away_game_total_yards']
                    )
                )
            )
            away_opponents_avg_total_ypg_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_total_yards'],
                        team_data['home_game_total_yards']
                    )
                )
            )
            away_opponents_avg_passing_ypg_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_passing_yards'],
                        team_data['away_game_passing_yards']
                    )
                )
            )
            away_opponents_avg_passing_ypg_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_passing_yards'],
                        team_data['home_game_passing_yards']
                    )
                )
            )
            away_opponents_avg_passing_ypc_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_yards_per_pass'],
                        team_data['away_game_yards_per_pass']
                    )
                )
            )
            away_opponents_avg_passing_ypc_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_yards_per_pass'],
                        team_data['home_game_yards_per_pass']
                    )
                )
            )
            away_opponents_avg_rushing_ypg_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_rushing_yards'],
                        team_data['away_game_rushing_yards']
                    )
                )
            )
            away_opponents_avg_rushing_ypg_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_rushing_yards'],
                        team_data['home_game_rushing_yards']
                    )
                )
            )            
            away_opponents_avg_rushing_ypc_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['home_game_yards_per_carry'],
                        team_data['away_game_yards_per_carry']
                    )
                )
            )
            away_opponents_avg_rushing_ypc_allowed_cumulative.append(
                np.nanmean(
                    np.where(
                        team_data['home_team'] == team,
                        team_data['away_game_yards_per_carry'],
                        team_data['home_game_yards_per_carry']
                    )
                )
            )    
            
        for team in past_1_years_home_team_opponents:
            team_data_past_1_years = df[past_1_years_date_filter_condition][((df[past_1_years_date_filter_condition]['home_team'] == team) | (df[past_1_years_date_filter_condition]['away_team'] == team))]
            home_opponents_avg_total_ypg_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_total_yards'],
                        team_data_past_1_years['away_game_total_yards']
                    )
                )
            ) 
            home_opponents_avg_total_ypg_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_total_yards'],
                        team_data_past_1_years['home_game_total_yards']
                    )
                )
            )
            home_opponents_avg_passing_ypg_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_passing_yards'],
                        team_data_past_1_years['away_game_passing_yards']
                    )
                )
            ) 
            home_opponents_avg_passing_ypg_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_passing_yards'],
                        team_data_past_1_years['home_game_passing_yards']
                    )
                )
            )
            home_opponents_avg_passing_ypc_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_yards_per_pass'],
                        team_data_past_1_years['away_game_yards_per_pass']
                    )
                )
            ) 
            home_opponents_avg_passing_ypc_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_yards_per_pass'],
                        team_data_past_1_years['home_game_yards_per_pass']
                    )
                )
            )
            home_opponents_avg_rushing_ypg_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_rushing_yards'],
                        team_data_past_1_years['away_game_rushing_yards']
                    )
                )
            ) 
            home_opponents_avg_rushing_ypg_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_rushing_yards'],
                        team_data_past_1_years['home_game_rushing_yards']
                    )
                )
            )
            home_opponents_avg_rushing_ypc_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_yards_per_carry'],
                        team_data_past_1_years['away_game_yards_per_carry']
                    )
                )
            ) 
            home_opponents_avg_rushing_ypc_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_yards_per_carry'],
                        team_data_past_1_years['home_game_yards_per_carry']
                    )
                )
            )
            
        for team in past_1_years_away_team_opponents:        
            team_data_past_1_years = df[past_1_years_date_filter_condition][((df[past_1_years_date_filter_condition]['home_team'] == team) | (df[past_1_years_date_filter_condition]['away_team'] == team))]
            away_opponents_avg_total_ypg_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_total_yards'],
                        team_data_past_1_years['away_game_total_yards']
                    )
                )
            )
            away_opponents_avg_total_ypg_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_total_yards'],
                        team_data_past_1_years['home_game_total_yards']
                    )
                )
            )
            away_opponents_avg_passing_ypg_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_passing_yards'],
                        team_data_past_1_years['away_game_passing_yards']
                    )
                )
            )
            away_opponents_avg_passing_ypg_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_passing_yards'],
                        team_data_past_1_years['home_game_passing_yards']
                    )
                )
            )
            away_opponents_avg_passing_ypc_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_yards_per_pass'],
                        team_data_past_1_years['away_game_yards_per_pass']
                    )
                )
            )
            away_opponents_avg_passing_ypc_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_yards_per_pass'],
                        team_data_past_1_years['home_game_yards_per_pass']
                    )
                )
            )
            away_opponents_avg_rushing_ypg_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_rushing_yards'],
                        team_data_past_1_years['away_game_rushing_yards']
                    )
                )
            )
            away_opponents_avg_rushing_ypg_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_rushing_yards'],
                        team_data_past_1_years['home_game_rushing_yards']
                    )
                )
            )
            away_opponents_avg_rushing_ypc_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['home_game_yards_per_carry'],
                        team_data_past_1_years['away_game_yards_per_carry']
                    )
                )
            )
            away_opponents_avg_rushing_ypc_allowed_past_1_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_1_years['home_team'] == team,
                        team_data_past_1_years['away_game_yards_per_carry'],
                        team_data_past_1_years['home_game_yards_per_carry']
                    )
                )
            )
            
        for team in past_2_years_home_team_opponents:    
            team_data_past_2_years = df[past_2_years_date_filter_condition][((df[past_2_years_date_filter_condition]['home_team'] == team) | (df[past_2_years_date_filter_condition]['away_team'] == team))]
            home_opponents_avg_total_ypg_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_total_yards'],
                        team_data_past_2_years['away_game_total_yards']
                    )
                )
            )
            home_opponents_avg_total_ypg_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_total_yards'],
                        team_data_past_2_years['home_game_total_yards']
                    )
                )
            )
            home_opponents_avg_passing_ypg_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_passing_yards'],
                        team_data_past_2_years['away_game_passing_yards']
                    )
                )
            )
            home_opponents_avg_passing_ypg_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_passing_yards'],
                        team_data_past_2_years['home_game_passing_yards']
                    )
                )
            )
            home_opponents_avg_passing_ypc_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_yards_per_pass'],
                        team_data_past_2_years['away_game_yards_per_pass']
                    )
                )
            )
            home_opponents_avg_passing_ypc_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_yards_per_pass'],
                        team_data_past_2_years['home_game_yards_per_pass']
                    )
                )
            ) 
            home_opponents_avg_rushing_ypg_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_rushing_yards'],
                        team_data_past_2_years['away_game_rushing_yards']
                    )
                )
            )
            home_opponents_avg_rushing_ypg_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_rushing_yards'],
                        team_data_past_2_years['home_game_rushing_yards']
                    )
                )
            )
            home_opponents_avg_rushing_ypc_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_yards_per_carry'],
                        team_data_past_2_years['away_game_yards_per_carry']
                    )
                )
            )
            home_opponents_avg_rushing_ypc_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_yards_per_carry'],
                        team_data_past_2_years['home_game_yards_per_carry']
                    )
                )
            )

        
        for team in past_2_years_away_team_opponents:        
            team_data_past_2_years = df[past_2_years_date_filter_condition][((df[past_2_years_date_filter_condition]['home_team'] == team) | (df[past_2_years_date_filter_condition]['away_team'] == team))]
            away_opponents_avg_total_ypg_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_total_yards'],
                        team_data_past_2_years['away_game_total_yards']
                    )
                )
            )
            away_opponents_avg_total_ypg_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_total_yards'],
                        team_data_past_2_years['home_game_total_yards']
                    )
                )
            )
            away_opponents_avg_passing_ypg_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_passing_yards'],
                        team_data_past_2_years['away_game_passing_yards']
                    )
                )
            )
            away_opponents_avg_passing_ypg_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_passing_yards'],
                        team_data_past_2_years['home_game_passing_yards']
                    )
                )
            )
            away_opponents_avg_passing_ypc_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_yards_per_pass'],
                        team_data_past_2_years['away_game_yards_per_pass']
                    )
                )
            )
            away_opponents_avg_passing_ypc_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_yards_per_pass'],
                        team_data_past_2_years['home_game_yards_per_pass']
                    )
                )
            )
            away_opponents_avg_rushing_ypg_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_rushing_yards'],
                        team_data_past_2_years['away_game_rushing_yards']
                    )
                )
            )
            away_opponents_avg_rushing_ypg_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_rushing_yards'],
                        team_data_past_2_years['home_game_rushing_yards']
                    )
                )
            )
            away_opponents_avg_rushing_ypc_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['home_game_yards_per_carry'],
                        team_data_past_2_years['away_game_yards_per_carry']
                    )
                )
            )
            away_opponents_avg_rushing_ypc_allowed_past_2_years.append(
                np.nanmean(
                    np.where(
                        team_data_past_2_years['home_team'] == team,
                        team_data_past_2_years['away_game_yards_per_carry'],
                        team_data_past_2_years['home_game_yards_per_carry']
                    )
                )
            )     
            
        if len(home_opponents_avg_total_ypg_cumulative) > 0:
            df.at[index, 'home_opponents_avg_total_ypg_cumulative'] = np.nanmean(home_opponents_avg_total_ypg_cumulative)
        if len(away_opponents_avg_total_ypg_cumulative) > 0:
            df.at[index, 'away_opponents_avg_total_ypg_cumulative'] = np.nanmean(away_opponents_avg_total_ypg_cumulative)
        if len(home_opponents_avg_total_ypg_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_total_ypg_past_1_years'] = np.nanmean(home_opponents_avg_total_ypg_past_1_years)
        if len(away_opponents_avg_total_ypg_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_total_ypg_past_1_years'] = np.nanmean(away_opponents_avg_total_ypg_past_1_years)
        if len(home_opponents_avg_total_ypg_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_total_ypg_past_2_years'] = np.nanmean(home_opponents_avg_total_ypg_past_2_years)
        if len(away_opponents_avg_total_ypg_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_total_ypg_past_2_years'] = np.nanmean(away_opponents_avg_total_ypg_past_2_years)
        if len(home_opponents_avg_total_ypg_allowed_cumulative) > 0:
            df.at[index, 'home_opponents_avg_total_ypg_allowed_cumulative'] = np.nanmean(home_opponents_avg_total_ypg_allowed_cumulative)
        if len(away_opponents_avg_total_ypg_allowed_cumulative) > 0:
            df.at[index, 'away_opponents_avg_total_ypg_allowed_cumulative'] = np.nanmean(away_opponents_avg_total_ypg_allowed_cumulative)
        if len(home_opponents_avg_total_ypg_allowed_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_total_ypg_allowed_past_1_years'] = np.nanmean(home_opponents_avg_total_ypg_allowed_past_1_years)
        if len(away_opponents_avg_total_ypg_allowed_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_total_ypg_allowed_past_1_years'] = np.nanmean(away_opponents_avg_total_ypg_allowed_past_1_years)
        if len(home_opponents_avg_total_ypg_allowed_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_total_ypg_allowed_past_2_years'] = np.nanmean(home_opponents_avg_total_ypg_allowed_past_2_years)
        if len(away_opponents_avg_total_ypg_allowed_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_total_ypg_allowed_past_2_years'] = np.nanmean(away_opponents_avg_total_ypg_allowed_past_2_years)      
        
        if len(home_opponents_avg_passing_ypg_cumulative) > 0:
            df.at[index, 'home_opponents_avg_passing_ypg_cumulative'] = np.nanmean(home_opponents_avg_passing_ypg_cumulative)
        if len(away_opponents_avg_passing_ypg_cumulative) > 0:
            df.at[index, 'away_opponents_avg_passing_ypg_cumulative'] = np.nanmean(away_opponents_avg_passing_ypg_cumulative)
        if len(home_opponents_avg_passing_ypg_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_passing_ypg_past_1_years'] = np.nanmean(home_opponents_avg_passing_ypg_past_1_years)
        if len(away_opponents_avg_passing_ypg_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_passing_ypg_past_1_years'] = np.nanmean(away_opponents_avg_passing_ypg_past_1_years) 
        if len(home_opponents_avg_passing_ypg_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_passing_ypg_past_2_years'] = np.nanmean(home_opponents_avg_passing_ypg_past_2_years)
        if len(away_opponents_avg_passing_ypg_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_passing_ypg_past_2_years'] = np.nanmean(away_opponents_avg_passing_ypg_past_2_years)
        if len(home_opponents_avg_passing_ypg_allowed_cumulative) > 0:
            df.at[index, 'home_opponents_avg_passing_ypg_allowed_cumulative'] = np.nanmean(home_opponents_avg_passing_ypg_allowed_cumulative)
        if len(away_opponents_avg_passing_ypg_allowed_cumulative) > 0:
            df.at[index, 'away_opponents_avg_passing_ypg_allowed_cumulative'] = np.nanmean(away_opponents_avg_passing_ypg_allowed_cumulative)
        if len(home_opponents_avg_passing_ypg_allowed_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_passing_ypg_allowed_past_1_years'] = np.nanmean(home_opponents_avg_passing_ypg_allowed_past_1_years)
        if len(away_opponents_avg_passing_ypg_allowed_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_passing_ypg_allowed_past_1_years'] = np.nanmean(away_opponents_avg_passing_ypg_allowed_past_1_years)
        if len(home_opponents_avg_passing_ypg_allowed_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_passing_ypg_allowed_past_2_years'] = np.nanmean(home_opponents_avg_passing_ypg_allowed_past_2_years)
        if len(away_opponents_avg_passing_ypg_allowed_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_passing_ypg_allowed_past_2_years'] = np.nanmean(away_opponents_avg_passing_ypg_allowed_past_2_years)  

        if len(home_opponents_avg_passing_ypc_cumulative) > 0:
            df.at[index, 'home_opponents_avg_passing_ypc_cumulative'] = np.nanmean(home_opponents_avg_passing_ypc_cumulative)
        if len(away_opponents_avg_passing_ypc_cumulative) > 0:
            df.at[index, 'away_opponents_avg_passing_ypc_cumulative'] = np.nanmean(away_opponents_avg_passing_ypc_cumulative)
        if len(home_opponents_avg_passing_ypc_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_passing_ypc_past_1_years'] = np.nanmean(home_opponents_avg_passing_ypc_past_1_years)
        if len(away_opponents_avg_passing_ypc_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_passing_ypc_past_1_years'] = np.nanmean(away_opponents_avg_passing_ypc_past_1_years) 
        if len(home_opponents_avg_passing_ypc_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_passing_ypc_past_2_years'] = np.nanmean(home_opponents_avg_passing_ypc_past_2_years)
        if len(away_opponents_avg_passing_ypc_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_passing_ypc_past_2_years'] = np.nanmean(away_opponents_avg_passing_ypc_past_2_years)
        if len(home_opponents_avg_passing_ypc_allowed_cumulative) > 0:
            df.at[index, 'home_opponents_avg_passing_ypc_allowed_cumulative'] = np.nanmean(home_opponents_avg_passing_ypc_allowed_cumulative)
        if len(away_opponents_avg_passing_ypc_allowed_cumulative) > 0:
            df.at[index, 'away_opponents_avg_passing_ypc_allowed_cumulative'] = np.nanmean(away_opponents_avg_passing_ypc_allowed_cumulative)
        if len(home_opponents_avg_passing_ypc_allowed_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_passing_ypc_allowed_past_1_years'] = np.nanmean(home_opponents_avg_passing_ypc_allowed_past_1_years)
        if len(away_opponents_avg_passing_ypc_allowed_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_passing_ypc_allowed_past_1_years'] = np.nanmean(away_opponents_avg_passing_ypc_allowed_past_1_years)
        if len(home_opponents_avg_passing_ypc_allowed_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_passing_ypc_allowed_past_2_years'] = np.nanmean(home_opponents_avg_passing_ypc_allowed_past_2_years)
        if len(away_opponents_avg_passing_ypc_allowed_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_passing_ypc_allowed_past_2_years'] = np.nanmean(away_opponents_avg_passing_ypc_allowed_past_2_years)  

        if len(home_opponents_avg_rushing_ypg_cumulative) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypg_cumulative'] = np.nanmean(home_opponents_avg_rushing_ypg_cumulative)
        if len(away_opponents_avg_rushing_ypg_cumulative) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypg_cumulative'] = np.nanmean(away_opponents_avg_rushing_ypg_cumulative)
        if len(home_opponents_avg_rushing_ypg_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypg_past_1_years'] = np.nanmean(home_opponents_avg_rushing_ypg_past_1_years)
        if len(away_opponents_avg_rushing_ypg_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypg_past_1_years'] = np.nanmean(away_opponents_avg_rushing_ypg_past_1_years) 
        if len(home_opponents_avg_rushing_ypg_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypg_past_2_years'] = np.nanmean(home_opponents_avg_rushing_ypg_past_2_years)
        if len(away_opponents_avg_rushing_ypg_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypg_past_2_years'] = np.nanmean(away_opponents_avg_rushing_ypg_past_2_years)
        if len(home_opponents_avg_rushing_ypg_allowed_cumulative) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypg_allowed_cumulative'] = np.nanmean(home_opponents_avg_rushing_ypg_allowed_cumulative)
        if len(away_opponents_avg_rushing_ypg_allowed_cumulative) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypg_allowed_cumulative'] = np.nanmean(away_opponents_avg_rushing_ypg_allowed_cumulative)
        if len(home_opponents_avg_rushing_ypg_allowed_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypg_allowed_past_1_years'] = np.nanmean(home_opponents_avg_rushing_ypg_allowed_past_1_years)
        if len(away_opponents_avg_rushing_ypg_allowed_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypg_allowed_past_1_years'] = np.nanmean(away_opponents_avg_rushing_ypg_allowed_past_1_years)
        if len(home_opponents_avg_rushing_ypg_allowed_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypg_allowed_past_2_years'] = np.nanmean(home_opponents_avg_rushing_ypg_allowed_past_2_years)
        if len(away_opponents_avg_rushing_ypg_allowed_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypg_allowed_past_2_years'] = np.nanmean(away_opponents_avg_rushing_ypg_allowed_past_2_years)  

        if len(home_opponents_avg_rushing_ypc_cumulative) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypc_cumulative'] = np.nanmean(home_opponents_avg_rushing_ypc_cumulative)
        if len(away_opponents_avg_rushing_ypc_cumulative) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypc_cumulative'] = np.nanmean(away_opponents_avg_rushing_ypc_cumulative)
        if len(home_opponents_avg_rushing_ypc_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypc_past_1_years'] = np.nanmean(home_opponents_avg_rushing_ypc_past_1_years)
        if len(away_opponents_avg_rushing_ypc_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypc_past_1_years'] = np.nanmean(away_opponents_avg_rushing_ypc_past_1_years) 
        if len(home_opponents_avg_rushing_ypc_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypc_past_2_years'] = np.nanmean(home_opponents_avg_rushing_ypc_past_2_years)
        if len(away_opponents_avg_rushing_ypc_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypc_past_2_years'] = np.nanmean(away_opponents_avg_rushing_ypc_past_2_years)
        if len(home_opponents_avg_rushing_ypc_allowed_cumulative) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypc_allowed_cumulative'] = np.nanmean(home_opponents_avg_rushing_ypc_allowed_cumulative)
        if len(away_opponents_avg_rushing_ypc_allowed_cumulative) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypc_allowed_cumulative'] = np.nanmean(away_opponents_avg_rushing_ypc_allowed_cumulative)
        if len(home_opponents_avg_rushing_ypc_allowed_past_1_years) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypc_allowed_past_1_years'] = np.nanmean(home_opponents_avg_rushing_ypc_allowed_past_1_years)
        if len(away_opponents_avg_rushing_ypc_allowed_past_1_years) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypc_allowed_past_1_years'] = np.nanmean(away_opponents_avg_rushing_ypc_allowed_past_1_years)
        if len(home_opponents_avg_rushing_ypc_allowed_past_2_years) > 0:
            df.at[index, 'home_opponents_avg_rushing_ypc_allowed_past_2_years'] = np.nanmean(home_opponents_avg_rushing_ypc_allowed_past_2_years)
        if len(away_opponents_avg_rushing_ypc_allowed_past_2_years) > 0:
            df.at[index, 'away_opponents_avg_rushing_ypc_allowed_past_2_years'] = np.nanmean(away_opponents_avg_rushing_ypc_allowed_past_2_years)  

    return df

In [229]:
def get_relative_ypg_stats(df):
    print("get_relative_ypg_stats")

    df = get_past_ypg_stats(df)
    df = get_past_opponents_ypg_stats(df)
    
    # Calculate relative scoring margins for each statistic
    relative_stats = [
        ('home_avg_total_ypg_cumulative', 'home_opponents_avg_total_ypg_allowed_cumulative', 'home_relative_avg_total_ypg_cumulative'),
        ('away_avg_total_ypg_cumulative', 'away_opponents_avg_total_ypg_allowed_cumulative', 'away_relative_avg_total_ypg_cumulative'),
        ('home_avg_total_ypg_past_1_years', 'home_opponents_avg_total_ypg_allowed_past_1_years', 'home_relative_avg_total_ypg_past_1_years'),
        ('away_avg_total_ypg_past_1_years', 'away_opponents_avg_total_ypg_allowed_past_1_years', 'away_relative_avg_total_ypg_past_1_years'),
        ('home_avg_total_ypg_past_2_years', 'home_opponents_avg_total_ypg_allowed_past_2_years', 'home_relative_avg_total_ypg_past_2_years'),
        ('away_avg_total_ypg_past_2_years', 'away_opponents_avg_total_ypg_allowed_past_2_years', 'away_relative_avg_total_ypg_past_2_years'),
        ('home_avg_total_ypg_allowed_cumulative', 'home_opponents_avg_total_ypg_cumulative', 'home_relative_avg_total_ypg_allowed_cumulative'),
        ('away_avg_total_ypg_allowed_cumulative', 'away_opponents_avg_total_ypg_cumulative', 'away_relative_avg_total_ypg_allowed_cumulative'),
        ('home_avg_total_ypg_allowed_past_1_years', 'home_opponents_avg_total_ypg_past_1_years', 'home_relative_avg_total_ypg_allowed_past_1_years'),
        ('away_avg_total_ypg_allowed_past_1_years', 'away_opponents_avg_total_ypg_past_1_years', 'away_relative_avg_total_ypg_allowed_past_1_years'),
        ('home_avg_total_ypg_allowed_past_2_years', 'home_opponents_avg_total_ypg_past_2_years', 'home_relative_avg_total_ypg_allowed_past_2_years'),
        ('away_avg_total_ypg_allowed_past_2_years', 'away_opponents_avg_total_ypg_past_2_years', 'away_relative_avg_total_ypg_allowed_past_2_years'),

        ('home_avg_passing_ypg_cumulative', 'home_opponents_avg_passing_ypg_allowed_cumulative', 'home_relative_avg_passing_ypg_cumulative'),
        ('away_avg_passing_ypg_cumulative', 'away_opponents_avg_passing_ypg_allowed_cumulative', 'away_relative_avg_passing_ypg_cumulative'),
        ('home_avg_passing_ypg_past_1_years', 'home_opponents_avg_passing_ypg_allowed_past_1_years', 'home_relative_avg_passing_ypg_past_1_years'),
        ('away_avg_passing_ypg_past_1_years', 'away_opponents_avg_passing_ypg_allowed_past_1_years', 'away_relative_avg_passing_ypg_past_1_years'),
        ('home_avg_passing_ypg_past_2_years', 'home_opponents_avg_passing_ypg_allowed_past_2_years', 'home_relative_avg_passing_ypg_past_2_years'),
        ('away_avg_passing_ypg_past_2_years', 'away_opponents_avg_passing_ypg_allowed_past_2_years', 'away_relative_avg_passing_ypg_past_2_years'),
        ('home_avg_passing_ypg_allowed_cumulative', 'home_opponents_avg_passing_ypg_cumulative', 'home_relative_avg_passing_ypg_allowed_cumulative'),
        ('away_avg_passing_ypg_allowed_cumulative', 'away_opponents_avg_passing_ypg_cumulative', 'away_relative_avg_passing_ypg_allowed_cumulative'),
        ('home_avg_passing_ypg_allowed_past_1_years', 'home_opponents_avg_passing_ypg_past_1_years', 'home_relative_avg_passing_ypg_allowed_past_1_years'),
        ('away_avg_passing_ypg_allowed_past_1_years', 'away_opponents_avg_passing_ypg_past_1_years', 'away_relative_avg_passing_ypg_allowed_past_1_years'),
        ('home_avg_passing_ypg_allowed_past_2_years', 'home_opponents_avg_passing_ypg_past_2_years', 'home_relative_avg_passing_ypg_allowed_past_2_years'),
        ('away_avg_passing_ypg_allowed_past_2_years', 'away_opponents_avg_passing_ypg_past_2_years', 'away_relative_avg_passing_ypg_allowed_past_2_years'),

        ('home_avg_passing_ypc_cumulative', 'home_opponents_avg_passing_ypc_allowed_cumulative', 'home_relative_avg_passing_ypc_cumulative'),
        ('away_avg_passing_ypc_cumulative', 'away_opponents_avg_passing_ypc_allowed_cumulative', 'away_relative_avg_passing_ypc_cumulative'),
        ('home_avg_passing_ypc_past_1_years', 'home_opponents_avg_passing_ypc_allowed_past_1_years', 'home_relative_avg_passing_ypc_past_1_years'),
        ('away_avg_passing_ypc_past_1_years', 'away_opponents_avg_passing_ypc_allowed_past_1_years', 'away_relative_avg_passing_ypc_past_1_years'),
        ('home_avg_passing_ypc_past_2_years', 'home_opponents_avg_passing_ypc_allowed_past_2_years', 'home_relative_avg_passing_ypc_past_2_years'),
        ('away_avg_passing_ypc_past_2_years', 'away_opponents_avg_passing_ypc_allowed_past_2_years', 'away_relative_avg_passing_ypc_past_2_years'),
        ('home_avg_passing_ypc_allowed_cumulative', 'home_opponents_avg_passing_ypc_cumulative', 'home_relative_avg_passing_ypc_allowed_cumulative'),
        ('away_avg_passing_ypc_allowed_cumulative', 'away_opponents_avg_passing_ypc_cumulative', 'away_relative_avg_passing_ypc_allowed_cumulative'),
        ('home_avg_passing_ypc_allowed_past_1_years', 'home_opponents_avg_passing_ypc_past_1_years', 'home_relative_avg_passing_ypc_allowed_past_1_years'),
        ('away_avg_passing_ypc_allowed_past_1_years', 'away_opponents_avg_passing_ypc_past_1_years', 'away_relative_avg_passing_ypc_allowed_past_1_years'),
        ('home_avg_passing_ypc_allowed_past_2_years', 'home_opponents_avg_passing_ypc_past_2_years', 'home_relative_avg_passing_ypc_allowed_past_2_years'),
        ('away_avg_passing_ypc_allowed_past_2_years', 'away_opponents_avg_passing_ypc_past_2_years', 'away_relative_avg_passing_ypc_allowed_past_2_years'),

        ('home_avg_rushing_ypg_cumulative', 'home_opponents_avg_rushing_ypg_allowed_cumulative', 'home_relative_avg_rushing_ypg_cumulative'),
        ('away_avg_rushing_ypg_cumulative', 'away_opponents_avg_rushing_ypg_allowed_cumulative', 'away_relative_avg_rushing_ypg_cumulative'),
        ('home_avg_rushing_ypg_past_1_years', 'home_opponents_avg_rushing_ypg_allowed_past_1_years', 'home_relative_avg_rushing_ypg_past_1_years'),
        ('away_avg_rushing_ypg_past_1_years', 'away_opponents_avg_rushing_ypg_allowed_past_1_years', 'away_relative_avg_rushing_ypg_past_1_years'),
        ('home_avg_rushing_ypg_past_2_years', 'home_opponents_avg_rushing_ypg_allowed_past_2_years', 'home_relative_avg_rushing_ypg_past_2_years'),
        ('away_avg_rushing_ypg_past_2_years', 'away_opponents_avg_rushing_ypg_allowed_past_2_years', 'away_relative_avg_rushing_ypg_past_2_years'),
        ('home_avg_rushing_ypg_allowed_cumulative', 'home_opponents_avg_rushing_ypg_cumulative', 'home_relative_avg_rushing_ypg_allowed_cumulative'),
        ('away_avg_rushing_ypg_allowed_cumulative', 'away_opponents_avg_rushing_ypg_cumulative', 'away_relative_avg_rushing_ypg_allowed_cumulative'),
        ('home_avg_rushing_ypg_allowed_past_1_years', 'home_opponents_avg_rushing_ypg_past_1_years', 'home_relative_avg_rushing_ypg_allowed_past_1_years'),
        ('away_avg_rushing_ypg_allowed_past_1_years', 'away_opponents_avg_rushing_ypg_past_1_years', 'away_relative_avg_rushing_ypg_allowed_past_1_years'),
        ('home_avg_rushing_ypg_allowed_past_2_years', 'home_opponents_avg_rushing_ypg_past_2_years', 'home_relative_avg_rushing_ypg_allowed_past_2_years'),
        ('away_avg_rushing_ypg_allowed_past_2_years', 'away_opponents_avg_rushing_ypg_past_2_years', 'away_relative_avg_rushing_ypg_allowed_past_2_years'),

        ('home_avg_rushing_ypc_cumulative', 'home_opponents_avg_rushing_ypc_allowed_cumulative', 'home_relative_avg_rushing_ypc_cumulative'),
        ('away_avg_rushing_ypc_cumulative', 'away_opponents_avg_rushing_ypc_allowed_cumulative', 'away_relative_avg_rushing_ypc_cumulative'),
        ('home_avg_rushing_ypc_past_1_years', 'home_opponents_avg_rushing_ypc_allowed_past_1_years', 'home_relative_avg_rushing_ypc_past_1_years'),
        ('away_avg_rushing_ypc_past_1_years', 'away_opponents_avg_rushing_ypc_allowed_past_1_years', 'away_relative_avg_rushing_ypc_past_1_years'),
        ('home_avg_rushing_ypc_past_2_years', 'home_opponents_avg_rushing_ypc_allowed_past_2_years', 'home_relative_avg_rushing_ypc_past_2_years'),
        ('away_avg_rushing_ypc_past_2_years', 'away_opponents_avg_rushing_ypc_allowed_past_2_years', 'away_relative_avg_rushing_ypc_past_2_years'),
        ('home_avg_rushing_ypc_allowed_cumulative', 'home_opponents_avg_rushing_ypc_cumulative', 'home_relative_avg_rushing_ypc_allowed_cumulative'),
        ('away_avg_rushing_ypc_allowed_cumulative', 'away_opponents_avg_rushing_ypc_cumulative', 'away_relative_avg_rushing_ypc_allowed_cumulative'),
        ('home_avg_rushing_ypc_allowed_past_1_years', 'home_opponents_avg_rushing_ypc_past_1_years', 'home_relative_avg_rushing_ypc_allowed_past_1_years'),
        ('away_avg_rushing_ypc_allowed_past_1_years', 'away_opponents_avg_rushing_ypc_past_1_years', 'away_relative_avg_rushing_ypc_allowed_past_1_years'),
        ('home_avg_rushing_ypc_allowed_past_2_years', 'home_opponents_avg_rushing_ypc_past_2_years', 'home_relative_avg_rushing_ypc_allowed_past_2_years'),
        ('away_avg_rushing_ypc_allowed_past_2_years', 'away_opponents_avg_rushing_ypc_past_2_years', 'away_relative_avg_rushing_ypc_allowed_past_2_years')
    ]
    
    for stat, opponent_stat, relative_stat in relative_stats:
        df[relative_stat] = np.where(df[opponent_stat] == 0, 0.0, (df[stat] - df[opponent_stat]) / df[opponent_stat])

    return df

In [3]:
def get_all_data(data, start_year, end_year):
    data = get_games_info(data, start_year, end_year)
    data = get_betting_lines(data, start_year, end_year)
    data = get_talent_data(data, start_year, end_year)
    data = get_poll_data(data, start_year, end_year)
    data = get_basic_stats_data(data, start_year, end_year)
    data = get_adv_stats_data(data, start_year, end_year)
    data = get_returning_prod_data(data, start_year, end_year)
    data = get_strength_of_schedule(data)
    data = get_home_vs_away_field_team_performance(data)
    data = get_relative_ppg_stats(data)
    data = get_per_game_stats(data, start_year, end_year)   # Delete these columns for model
    data = get_relative_ypg_stats(data)
    
    data = data.drop(columns = ['away_game_total_yards', 'away_game_passing_yards', 'away_game_yards_per_pass', 'away_game_rushing_yards', 'away_game_yards_per_carry', 'home_game_total_yards', 'home_game_passing_yards', 'home_game_yards_per_pass', 'home_game_rushing_yards', 'home_game_yards_per_carry'])
    return data