#### CSC 180 Intelligent Systems 

#### William Lorence, Ajaydeep Singh, Romin Akoliya, Abdurraziq Paikur

#### California State University, Sacramento

# Final Project: NBA Outcome Predictions

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import leaguegamefinder


## Fetching Team and Player Data

In [7]:
from nba_api.stats.endpoints import leaguegamefinder

# Example: Get all games played by the Atlanta Hawks
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=1610612737)
games = gamefinder.get_data_frames()[0]  # Fetch the data as a DataFrame

# Display the first few rows
print(games.head())



  SEASON_ID     TEAM_ID TEAM_ABBREVIATION      TEAM_NAME     GAME_ID  \
0     22024  1610612737               ATL  Atlanta Hawks  0022400239   
1     22024  1610612737               ATL  Atlanta Hawks  0022400012   
2     22024  1610612737               ATL  Atlanta Hawks  0022400001   
3     22024  1610612737               ATL  Atlanta Hawks  0022400198   
4     22024  1610612737               ATL  Atlanta Hawks  0022400185   

    GAME_DATE      MATCHUP    WL  MIN  PTS  ...  FT_PCT  OREB  DREB  REB  AST  \
0  2024-11-17    ATL @ POR  None  228  111  ...   0.840    16    31   47   30   
1  2024-11-15  ATL vs. WAS     W  240  129  ...   0.833    15    40   55   28   
2  2024-11-12    ATL @ BOS     W  240  117  ...   0.538    19    25   44   35   
3  2024-11-09  ATL vs. CHI     L  240  113  ...   0.733    10    29   39   31   
4  2024-11-08    ATL @ DET     L  240  121  ...   0.783    15    29   44   30   

    STL  BLK  TOV  PF  PLUS_MINUS  
0  10.0    7   25  19       -10.2  
1  10.0 

## Filter and Format Data: Extract key information such as:

### Season
### Wins and losses
### Opponent teams

In [9]:
# Filter for games from the last 20 years
games['SEASON'] = games['SEASON_ID'].str[:4].astype(int)
recent_games = games[games['SEASON'] >= 2004]

# Create a simple win/loss indicator
recent_games['WIN'] = recent_games['WL'] == 'W'

# Display processed data
print(recent_games[['SEASON', 'TEAM_NAME', 'GAME_DATE', 'MATCHUP', 'WIN']].head())


   SEASON      TEAM_NAME   GAME_DATE      MATCHUP    WIN
0    2202  Atlanta Hawks  2024-11-17    ATL @ POR  False
1    2202  Atlanta Hawks  2024-11-15  ATL vs. WAS   True
2    2202  Atlanta Hawks  2024-11-12    ATL @ BOS   True
3    2202  Atlanta Hawks  2024-11-09  ATL vs. CHI  False
4    2202  Atlanta Hawks  2024-11-08    ATL @ DET  False


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recent_games['WIN'] = recent_games['WL'] == 'W'


## Fetch Player Career Averages: Use the playercareerstats endpoint to get data for individual players

In [10]:
from nba_api.stats.endpoints import playercareerstats

# Example: Fetch career stats for a specific player (LeBron James)
player_id = 2544  # LeBron James' ID
career = playercareerstats.PlayerCareerStats(player_id=player_id)
career_data = career.get_data_frames()[0]

# Display the career averages
print(career_data[['SEASON_ID', 'PTS', 'AST', 'REB']].tail())


   SEASON_ID   PTS  AST  REB
17   2020-21  1126  350  346
18   2021-22  1695  349  459
19   2022-23  1590  375  457
20   2023-24  1822  589  518
21   2024-25   303  120  112


## Store Data: Save the data into CSV files for easier use later:

In [12]:
recent_games.to_csv('team_records.csv', index=False)
career_data.to_csv('player_career_averages.csv', index=False)


## Preprocessing and Combining Datasets
### Load the Datasets: Import the CSV files into pandas DataFrames.

In [13]:
team_data = pd.read_csv('team_records.csv')
player_data = pd.read_csv('player_career_averages.csv')

# Display the first few rows
print("Team Data:")
print(team_data.head())
print("\nPlayer Data:")
print(player_data.head())


Team Data:
   SEASON_ID     TEAM_ID TEAM_ABBREVIATION      TEAM_NAME   GAME_ID  \
0      22024  1610612737               ATL  Atlanta Hawks  22400239   
1      22024  1610612737               ATL  Atlanta Hawks  22400012   
2      22024  1610612737               ATL  Atlanta Hawks  22400001   
3      22024  1610612737               ATL  Atlanta Hawks  22400198   
4      22024  1610612737               ATL  Atlanta Hawks  22400185   

    GAME_DATE      MATCHUP   WL  MIN  PTS  ...  DREB  REB  AST   STL  BLK  \
0  2024-11-17    ATL @ POR  NaN  228  111  ...    31   47   30  10.0    7   
1  2024-11-15  ATL vs. WAS    W  240  129  ...    40   55   28  10.0   10   
2  2024-11-12    ATL @ BOS    W  240  117  ...    25   44   35  16.0    2   
3  2024-11-09  ATL vs. CHI    L  240  113  ...    29   39   31   8.0    5   
4  2024-11-08    ATL @ DET    L  240  121  ...    29   44   30  12.0    7   

   TOV  PF  PLUS_MINUS  SEASON    WIN  
0   25  19       -10.2    2202  False  
1   16  17        1

### Preprocess Team Data:

#### Extract relevant columns (e.g., season, team name, win/loss).
#### Encode the target variable (win/loss) as 1 for win and 0 for loss.

In [14]:
team_data['WIN'] = team_data['WIN'].astype(int)  # Convert Boolean to integer
team_data_processed = team_data[['SEASON', 'TEAM_NAME', 'GAME_DATE', 'MATCHUP', 'WIN']]
print(team_data_processed.head())


   SEASON      TEAM_NAME   GAME_DATE      MATCHUP  WIN
0    2202  Atlanta Hawks  2024-11-17    ATL @ POR    0
1    2202  Atlanta Hawks  2024-11-15  ATL vs. WAS    1
2    2202  Atlanta Hawks  2024-11-12    ATL @ BOS    1
3    2202  Atlanta Hawks  2024-11-09  ATL vs. CHI    0
4    2202  Atlanta Hawks  2024-11-08    ATL @ DET    0


## Preprocess Player Data:

#### Filter career averages for relevant stats (e.g., points, assists, rebounds).
#### Create a dictionary of player stats grouped by season.

In [15]:
player_data_filtered = player_data[['SEASON_ID', 'PLAYER_ID', 'PTS', 'AST', 'REB']]
player_data_filtered['SEASON_ID'] = player_data_filtered['SEASON_ID'].str[:4].astype(int)  # Extract season year
print(player_data_filtered.head())



   SEASON_ID  PLAYER_ID   PTS  AST  REB
0       2003       2544  1654  465  432
1       2004       2544  2175  577  588
2       2005       2544  2478  521  556
3       2006       2544  2132  470  526
4       2007       2544  2250  539  592


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  player_data_filtered['SEASON_ID'] = player_data_filtered['SEASON_ID'].str[:4].astype(int)  # Extract season year


## Combine Datasets:

In [17]:
def calculate_team_stats(season, team_name, player_data):
    # Filter players for the given team and season
    players_on_team = player_data[player_data['SEASON_ID'] == season]
    
    # Calculate averages for relevant stats
    avg_pts = players_on_team['PTS'].mean()
    avg_ast = players_on_team['AST'].mean()
    avg_reb = players_on_team['REB'].mean()
    
    return avg_pts, avg_ast, avg_reb

# Add separate columns for each stat
team_data_processed[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG']] = team_data_processed.apply(
    lambda row: pd.Series(calculate_team_stats(row['SEASON'], row['TEAM_NAME'], player_data_filtered)),
    axis=1
)

print(team_data_processed.head())



   SEASON      TEAM_NAME   GAME_DATE      MATCHUP  WIN  TEAM_PTS_AVG  \
0    2202  Atlanta Hawks  2024-11-17    ATL @ POR    0           NaN   
1    2202  Atlanta Hawks  2024-11-15  ATL vs. WAS    1           NaN   
2    2202  Atlanta Hawks  2024-11-12    ATL @ BOS    1           NaN   
3    2202  Atlanta Hawks  2024-11-09  ATL vs. CHI    0           NaN   
4    2202  Atlanta Hawks  2024-11-08    ATL @ DET    0           NaN   

   TEAM_AST_AVG  TEAM_REB_AVG  
0           NaN           NaN  
1           NaN           NaN  
2           NaN           NaN  
3           NaN           NaN  
4           NaN           NaN  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_processed[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG']] = team_data_processed.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_processed[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG']] = team_data_processed.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_da

In [19]:
team_data_processed.to_csv('combined_dataset.csv', index=False)


In [20]:
import os
print(os.listdir())


['.git', '.ipynb_checkpoints', 'combined_dataset.csv', 'finalproject.ipynb', 'player_career_averages.csv', 'team_records.csv']


## Load and Prepare the Data

In [21]:
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('combined_dataset.csv')

# Select features and target
X = data[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG']]  # Use more features if available
y = data['WIN']  # Target variable

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Display the shape of training and test sets
print("Training features shape:", X_train.shape)
print("Test features shape:", X_test.shape)


Training features shape: (2518, 3)
Test features shape: (1080, 3)


##  Build the Neural Network Model

In [22]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  # Input layer
    Dense(32, activation='relu'),  # Hidden layer
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Train the Model

In [23]:
from tensorflow.keras.callbacks import EarlyStopping

# EarlyStopping to monitor validation loss
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,  # 20% of training data for validation
    epochs=50,             # Maximum number of epochs
    batch_size=32,         # Number of samples per gradient update
    callbacks=[early_stopping],
    verbose=1              # Display training progress
)


Epoch 1/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5227 - loss: 0.6931 - val_accuracy: 0.5456 - val_loss: 0.6926
Epoch 2/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5002 - loss: 0.6932 - val_accuracy: 0.5456 - val_loss: 0.6925
Epoch 3/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4998 - loss: 0.6932 - val_accuracy: 0.5456 - val_loss: 0.6922
Epoch 4/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5112 - loss: 0.6930 - val_accuracy: 0.5456 - val_loss: 0.6920
Epoch 5/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5109 - loss: 0.6930 - val_accuracy: 0.5456 - val_loss: 0.6920
Epoch 6/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5072 - loss: 0.6931 - val_accuracy: 0.5456 - val_loss: 0.6920
Epoch 7/50
[1m63/63[0m [32m━━━━━━━━━━

In [24]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 794us/step - accuracy: 0.5073 - loss: 0.6931
Test Loss: 0.6937479972839355
Test Accuracy: 0.49166667461395264


## Model Tuning and Performance Analysis

In [25]:
from sklearn.metrics import confusion_matrix, classification_report

# Make predictions
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Confusion Matrix:
[[531   0]
 [549   0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.49      1.00      0.66       531
           1       0.00      0.00      0.00       549

    accuracy                           0.49      1080
   macro avg       0.25      0.50      0.33      1080
weighted avg       0.24      0.49      0.32      1080



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


 ## Model Improvement

In [26]:
# Merge opponent stats into the dataset
def calculate_opponent_stats(row, player_data):
    # Extract season and opponent team name from the row
    season = row['SEASON']
    opponent = row['MATCHUP'].split(' ')[-1]
    
    # Filter player data for the opponent team and season
    opponent_players = player_data[player_data['SEASON_ID'] == season]
    avg_pts = opponent_players['PTS'].mean()
    avg_ast = opponent_players['AST'].mean()
    avg_reb = opponent_players['REB'].mean()
    
    return avg_pts, avg_ast, avg_reb

# Add opponent stats
team_data_processed[['OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']] = team_data_processed.apply(
    lambda row: pd.Series(calculate_opponent_stats(row, player_data_filtered)),
    axis=1
)

# Save the updated dataset
team_data_processed.to_csv('enhanced_dataset.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_processed[['OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']] = team_data_processed.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_processed[['OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']] = team_data_processed.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_pro

## Hyperparameter Tuning

In [27]:
# Load the enhanced dataset
data = pd.read_csv('enhanced_dataset.csv')

# Include opponent stats in features
X = data[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG', 'OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']]
y = data['WIN']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Retrain the model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32, verbose=1)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4991 - loss: 0.6931 - val_accuracy: 0.5456 - val_loss: 0.6928
Epoch 2/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5064 - loss: 0.6931 - val_accuracy: 0.5456 - val_loss: 0.6927
Epoch 3/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5040 - loss: 0.6931 - val_accuracy: 0.5456 - val_loss: 0.6924
Epoch 4/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4930 - loss: 0.6933 - val_accuracy: 0.5456 - val_loss: 0.6922
Epoch 5/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5186 - loss: 0.6928 - val_accuracy: 0.5456 - val_loss: 0.6920
Epoch 6/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5069 - loss: 0.6931 - val_accuracy: 0.5456 - val_loss: 0.6920
Epoch 7/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━

In [28]:
# Evaluate the improved model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Improved Test Loss: {test_loss}")
print(f"Improved Test Accuracy: {test_accuracy}")

# Generate predictions
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 831us/step - accuracy: 0.5073 - loss: 0.6931
Improved Test Loss: 0.6937205195426941
Improved Test Accuracy: 0.49166667461395264
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Confusion Matrix:
[[531   0]
 [549   0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.49      1.00      0.66       531
           1       0.00      0.00      0.00       549

    accuracy                           0.49      1080
   macro avg       0.25      0.50      0.33      1080
weighted avg       0.24      0.49      0.32      1080



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [29]:
# Check class distribution
print(data['WIN'].value_counts())


WIN
0    1834
1    1764
Name: count, dtype: int64


 #### Feature Engineering

In [30]:
# Example: Adding a home game indicator
data['HOME_GAME'] = data['MATCHUP'].apply(lambda x: 1 if 'vs.' in x else 0)

# Example: Adding recent form (requires sorting by game date)
data['RECENT_WINS'] = data.groupby('TEAM_NAME')['WIN'].rolling(5).mean().reset_index(0, drop=True)

# Drop NaN values generated from rolling averages
data.dropna(inplace=True)


#### Adjust the Model Archtecture

In [31]:
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Retrain the Model

In [33]:
print(X.head())  # Display the first few rows
print("Feature Set Shape:", X.shape)



Empty DataFrame
Columns: [TEAM_PTS_AVG, TEAM_AST_AVG, TEAM_REB_AVG, OPP_PTS_AVG, OPP_AST_AVG, OPP_REB_AVG, HOME_GAME, RECENT_WINS]
Index: []
Feature Set Shape: (0, 8)


In [34]:
print(y.head())  # Display the first few rows
print("Target Shape:", y.shape)


Series([], Name: WIN, dtype: int64)
Target Shape: (0,)


In [35]:
print(data[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG', 'OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG', 'HOME_GAME', 'RECENT_WINS']].info())


<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   TEAM_PTS_AVG  0 non-null      float64
 1   TEAM_AST_AVG  0 non-null      float64
 2   TEAM_REB_AVG  0 non-null      float64
 3   OPP_PTS_AVG   0 non-null      float64
 4   OPP_AST_AVG   0 non-null      float64
 5   OPP_REB_AVG   0 non-null      float64
 6   HOME_GAME     0 non-null      int64  
 7   RECENT_WINS   0 non-null      float64
dtypes: float64(7), int64(1)
memory usage: 0.0 bytes
None


In [36]:
# Check if opponent stats are being calculated correctly
print(team_data_processed[['OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']].head())


   OPP_PTS_AVG  OPP_AST_AVG  OPP_REB_AVG
0          NaN          NaN          NaN
1          NaN          NaN          NaN
2          NaN          NaN          NaN
3          NaN          NaN          NaN
4          NaN          NaN          NaN


In [38]:
def calculate_opponent_stats(row, player_data):
    # Extract season and opponent team name from the row
    season = row['SEASON']
    opponent = row['MATCHUP'].split(' ')[-1]  # Adjust this based on actual MATCHUP format

    # Filter player data for the opponent team and season
    opponent_players = player_data[(player_data['SEASON_ID'] == season) & (player_data['TEAM_NAME'] == opponent)]
    
    if not opponent_players.empty:
        # Calculate average stats for the opponent team
        avg_pts = opponent_players['PTS'].mean()
        avg_ast = opponent_players['AST'].mean()
        avg_reb = opponent_players['REB'].mean()
    else:
        # Default values for missing opponent data
        avg_pts = 0
        avg_ast = 0
        avg_reb = 0
    
    return avg_pts, avg_ast, avg_reb

