#### CSC 180 Intelligent Systems 

#### William Lorence, Ajaydeep Singh, Romin Akoliya, Abdurraziq Paikur

#### California State University, Sacramento

# Final Project: NBA Outcome Predictions

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import leaguegamefinder


## Fetching Team and Player Data

In [2]:
from nba_api.stats.endpoints import leaguegamefinder

# Example: Get all games played by the Atlanta Hawks
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=1610612737)
games = gamefinder.get_data_frames()[0]  # Fetch the data as a DataFrame

# Display the first few rows
print(games.head())


  SEASON_ID     TEAM_ID TEAM_ABBREVIATION      TEAM_NAME     GAME_ID  \
0     22024  1610612737               ATL  Atlanta Hawks  0022400258   
1     22024  1610612737               ATL  Atlanta Hawks  0022400250   
2     22024  1610612737               ATL  Atlanta Hawks  0022400239   
3     22024  1610612737               ATL  Atlanta Hawks  0022400012   
4     22024  1610612737               ATL  Atlanta Hawks  0022400001   

    GAME_DATE      MATCHUP WL  MIN  PTS  ...  FT_PCT  OREB  DREB  REB  AST  \
0  2024-11-20    ATL @ GSW  L  238   97  ...   0.625    21    44   65   27   
1  2024-11-18    ATL @ SAC  W  239  109  ...   0.833     6    31   37   34   
2  2024-11-17    ATL @ POR  L  240  110  ...   0.840    16    31   47   30   
3  2024-11-15  ATL vs. WAS  W  240  129  ...   0.833    15    40   55   28   
4  2024-11-12    ATL @ BOS  W  240  117  ...   0.538    19    25   44   35   

    STL  BLK  TOV  PF  PLUS_MINUS  
0   9.0    5   17  17       -23.0  
1   7.0    7   14  19     

## Filter and Format Data: Extract key information such as:

### Season
### Wins and losses
### Opponent teams

In [3]:
# Filter for games from the last 20 years
games['SEASON'] = games['SEASON_ID'].str[:4].astype(int)
recent_games = games[games['SEASON'] >= 2004]

# Create a simple win/loss indicator
recent_games['WIN'] = recent_games['WL'] == 'W'

# Display processed data
print(recent_games[['SEASON', 'TEAM_NAME', 'GAME_DATE', 'MATCHUP', 'WIN']].head())


   SEASON      TEAM_NAME   GAME_DATE      MATCHUP    WIN
0    2202  Atlanta Hawks  2024-11-20    ATL @ GSW  False
1    2202  Atlanta Hawks  2024-11-18    ATL @ SAC   True
2    2202  Atlanta Hawks  2024-11-17    ATL @ POR  False
3    2202  Atlanta Hawks  2024-11-15  ATL vs. WAS   True
4    2202  Atlanta Hawks  2024-11-12    ATL @ BOS   True


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recent_games['WIN'] = recent_games['WL'] == 'W'


## Fetch Player Career Averages: Use the playercareerstats endpoint to get data for individual players

In [4]:
from nba_api.stats.endpoints import playercareerstats

# Example: Fetch career stats for a specific player (LeBron James)
player_id = 2544  # LeBron James' ID
career = playercareerstats.PlayerCareerStats(player_id=player_id)
career_data = career.get_data_frames()[0]

# Display the career averages
print(career_data[['SEASON_ID', 'PTS', 'AST', 'REB']].tail())


   SEASON_ID   PTS  AST  REB
17   2020-21  1126  350  346
18   2021-22  1695  349  459
19   2022-23  1590  375  457
20   2023-24  1822  589  518
21   2024-25   329  132  114


## Store Data: Save the data into CSV files for easier use later:

In [5]:
recent_games.to_csv('team_records.csv', index=False)
career_data.to_csv('player_career_averages.csv', index=False)


## Preprocessing and Combining Datasets
### Load the Datasets: Import the CSV files into pandas DataFrames.

In [6]:
team_data = pd.read_csv('team_records.csv')
player_data = pd.read_csv('player_career_averages.csv')

# Display the first few rows
print("Team Data:")
print(team_data.head())
print("\nPlayer Data:")
print(player_data.head())


Team Data:
   SEASON_ID     TEAM_ID TEAM_ABBREVIATION      TEAM_NAME   GAME_ID  \
0      22024  1610612737               ATL  Atlanta Hawks  22400258   
1      22024  1610612737               ATL  Atlanta Hawks  22400250   
2      22024  1610612737               ATL  Atlanta Hawks  22400239   
3      22024  1610612737               ATL  Atlanta Hawks  22400012   
4      22024  1610612737               ATL  Atlanta Hawks  22400001   

    GAME_DATE      MATCHUP WL  MIN  PTS  ...  DREB  REB  AST   STL  BLK  TOV  \
0  2024-11-20    ATL @ GSW  L  238   97  ...    44   65   27   9.0    5   17   
1  2024-11-18    ATL @ SAC  W  239  109  ...    31   37   34   7.0    7   14   
2  2024-11-17    ATL @ POR  L  240  110  ...    31   47   30  10.0    7   25   
3  2024-11-15  ATL vs. WAS  W  240  129  ...    40   55   28  10.0   10   16   
4  2024-11-12    ATL @ BOS  W  240  117  ...    25   44   35  16.0    2   16   

   PF  PLUS_MINUS  SEASON    WIN  
0  17       -23.0    2202  False  
1  19      

### Preprocess Team Data:

#### Extract relevant columns (e.g., season, team name, win/loss).
#### Encode the target variable (win/loss) as 1 for win and 0 for loss.

In [7]:
team_data['WIN'] = team_data['WIN'].astype(int)  # Convert Boolean to integer
team_data_processed = team_data[['SEASON', 'TEAM_NAME', 'GAME_DATE', 'MATCHUP', 'WIN']]
print(team_data_processed.head())


   SEASON      TEAM_NAME   GAME_DATE      MATCHUP  WIN
0    2202  Atlanta Hawks  2024-11-20    ATL @ GSW    0
1    2202  Atlanta Hawks  2024-11-18    ATL @ SAC    1
2    2202  Atlanta Hawks  2024-11-17    ATL @ POR    0
3    2202  Atlanta Hawks  2024-11-15  ATL vs. WAS    1
4    2202  Atlanta Hawks  2024-11-12    ATL @ BOS    1


## Preprocess Player Data:

#### Filter career averages for relevant stats (e.g., points, assists, rebounds).
#### Create a dictionary of player stats grouped by season.

In [8]:
player_data_filtered = player_data[['SEASON_ID', 'PLAYER_ID', 'PTS', 'AST', 'REB']]
player_data_filtered['SEASON_ID'] = player_data_filtered['SEASON_ID'].str[:4].astype(int)  # Extract season year
print(player_data_filtered.head())



   SEASON_ID  PLAYER_ID   PTS  AST  REB
0       2003       2544  1654  465  432
1       2004       2544  2175  577  588
2       2005       2544  2478  521  556
3       2006       2544  2132  470  526
4       2007       2544  2250  539  592


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  player_data_filtered['SEASON_ID'] = player_data_filtered['SEASON_ID'].str[:4].astype(int)  # Extract season year


## Combine Datasets:

In [9]:
def calculate_team_stats(season, team_name, player_data):
    # Filter players for the given team and season
    players_on_team = player_data[player_data['SEASON_ID'] == season]
    
    # Calculate averages for relevant stats
    avg_pts = players_on_team['PTS'].mean()
    avg_ast = players_on_team['AST'].mean()
    avg_reb = players_on_team['REB'].mean()
    
    return avg_pts, avg_ast, avg_reb

# Add separate columns for each stat
team_data_processed[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG']] = team_data_processed.apply(
    lambda row: pd.Series(calculate_team_stats(row['SEASON'], row['TEAM_NAME'], player_data_filtered)),
    axis=1
)

print(team_data_processed.head())



   SEASON      TEAM_NAME   GAME_DATE      MATCHUP  WIN  TEAM_PTS_AVG  \
0    2202  Atlanta Hawks  2024-11-20    ATL @ GSW    0           NaN   
1    2202  Atlanta Hawks  2024-11-18    ATL @ SAC    1           NaN   
2    2202  Atlanta Hawks  2024-11-17    ATL @ POR    0           NaN   
3    2202  Atlanta Hawks  2024-11-15  ATL vs. WAS    1           NaN   
4    2202  Atlanta Hawks  2024-11-12    ATL @ BOS    1           NaN   

   TEAM_AST_AVG  TEAM_REB_AVG  
0           NaN           NaN  
1           NaN           NaN  
2           NaN           NaN  
3           NaN           NaN  
4           NaN           NaN  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_processed[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG']] = team_data_processed.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_processed[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG']] = team_data_processed.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_da

In [10]:
team_data_processed.to_csv('combined_dataset.csv', index=False)


In [11]:
import os
print(os.listdir())


['combined_dataset.csv', 'team_records.csv', 'finalproject.ipynb', '.ipynb_checkpoints', 'enhanced_dataset.csv', 'player_career_averages.csv']


## Load and Prepare the Data

In [12]:
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('combined_dataset.csv')

# Select features and target
X = data[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG']]  # Use more features if available
y = data['WIN']  # Target variable

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Display the shape of training and test sets
print("Training features shape:", X_train.shape)
print("Test features shape:", X_test.shape)


Training features shape: (2520, 3)
Test features shape: (1080, 3)


##  Build the Neural Network Model

In [13]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  # Input layer
    Dense(32, activation='relu'),  # Hidden layer
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                256       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2369 (9.25 KB)
Trainable params: 2369 (9.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## Train the Model

In [14]:
from tensorflow.keras.callbacks import EarlyStopping

# EarlyStopping to monitor validation loss
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,  # 20% of training data for validation
    epochs=50,             # Maximum number of epochs
    batch_size=32,         # Number of samples per gradient update
    callbacks=[early_stopping],
    verbose=1              # Display training progress
)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50


In [15]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


Test Loss: nan
Test Accuracy: 0.5185185074806213


## Model Tuning and Performance Analysis

In [16]:
from sklearn.metrics import confusion_matrix, classification_report

# Make predictions
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Confusion Matrix:
[[560   0]
 [520   0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.52      1.00      0.68       560
           1       0.00      0.00      0.00       520

    accuracy                           0.52      1080
   macro avg       0.26      0.50      0.34      1080
weighted avg       0.27      0.52      0.35      1080



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


 ## Model Improvement

In [17]:
# Merge opponent stats into the dataset
def calculate_opponent_stats(row, player_data):
    # Extract season and opponent team name from the row
    season = row['SEASON']
    opponent = row['MATCHUP'].split(' ')[-1]
    
    # Filter player data for the opponent team and season
    opponent_players = player_data[player_data['SEASON_ID'] == season]
    avg_pts = opponent_players['PTS'].mean()
    avg_ast = opponent_players['AST'].mean()
    avg_reb = opponent_players['REB'].mean()
    
    return avg_pts, avg_ast, avg_reb

# Add opponent stats
team_data_processed[['OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']] = team_data_processed.apply(
    lambda row: pd.Series(calculate_opponent_stats(row, player_data_filtered)),
    axis=1
)

# Save the updated dataset
team_data_processed.to_csv('enhanced_dataset.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_processed[['OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']] = team_data_processed.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_processed[['OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']] = team_data_processed.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data_pro

## Hyperparameter Tuning

In [18]:
# Load the enhanced dataset
data = pd.read_csv('enhanced_dataset.csv')

# Include opponent stats in features
X = data[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG', 'OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']]
y = data['WIN']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Retrain the model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32, verbose=1)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [19]:
# Evaluate the improved model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Improved Test Loss: {test_loss}")
print(f"Improved Test Accuracy: {test_accuracy}")

# Generate predictions
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Improved Test Loss: nan
Improved Test Accuracy: 0.5185185074806213

Confusion Matrix:
[[560   0]
 [520   0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.52      1.00      0.68       560
           1       0.00      0.00      0.00       520

    accuracy                           0.52      1080
   macro avg       0.26      0.50      0.34      1080
weighted avg       0.27      0.52      0.35      1080



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
# Check class distribution
print(data['WIN'].value_counts())


WIN
0    1835
1    1765
Name: count, dtype: int64


 #### Feature Engineering

In [21]:
# Example: Adding a home game indicator
data['HOME_GAME'] = data['MATCHUP'].apply(lambda x: 1 if 'vs.' in x else 0)

# Example: Adding recent form (requires sorting by game date)
data['RECENT_WINS'] = data.groupby('TEAM_NAME')['WIN'].rolling(5).mean().reset_index(0, drop=True)

# Drop NaN values generated from rolling averages
data.dropna(inplace=True)


#### Adjust the Model Archtecture

In [22]:
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])


## Retrain the Model

In [23]:
print(X.head())  # Display the first few rows
print("Feature Set Shape:", X.shape)



   TEAM_PTS_AVG  TEAM_AST_AVG  TEAM_REB_AVG  OPP_PTS_AVG  OPP_AST_AVG  \
0           NaN           NaN           NaN          NaN          NaN   
1           NaN           NaN           NaN          NaN          NaN   
2           NaN           NaN           NaN          NaN          NaN   
3           NaN           NaN           NaN          NaN          NaN   
4           NaN           NaN           NaN          NaN          NaN   

   OPP_REB_AVG  
0          NaN  
1          NaN  
2          NaN  
3          NaN  
4          NaN  
Feature Set Shape: (3600, 6)


In [24]:
print(y.head())  # Display the first few rows
print("Target Shape:", y.shape)


0    0
1    1
2    0
3    1
4    1
Name: WIN, dtype: int64
Target Shape: (3600,)


In [25]:
print(data[['TEAM_PTS_AVG', 'TEAM_AST_AVG', 'TEAM_REB_AVG', 'OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG', 'HOME_GAME', 'RECENT_WINS']].info())


<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   TEAM_PTS_AVG  0 non-null      float64
 1   TEAM_AST_AVG  0 non-null      float64
 2   TEAM_REB_AVG  0 non-null      float64
 3   OPP_PTS_AVG   0 non-null      float64
 4   OPP_AST_AVG   0 non-null      float64
 5   OPP_REB_AVG   0 non-null      float64
 6   HOME_GAME     0 non-null      int64  
 7   RECENT_WINS   0 non-null      float64
dtypes: float64(7), int64(1)
memory usage: 0.0 bytes
None


In [26]:
# Check if opponent stats are being calculated correctly
print(team_data_processed[['OPP_PTS_AVG', 'OPP_AST_AVG', 'OPP_REB_AVG']].head())


   OPP_PTS_AVG  OPP_AST_AVG  OPP_REB_AVG
0          NaN          NaN          NaN
1          NaN          NaN          NaN
2          NaN          NaN          NaN
3          NaN          NaN          NaN
4          NaN          NaN          NaN


In [27]:
def calculate_opponent_stats(row, player_data):
    # Extract season and opponent team name from the row
    season = row['SEASON']
    opponent = row['MATCHUP'].split(' ')[-1]  # Adjust this based on actual MATCHUP format

    # Filter player data for the opponent team and season
    opponent_players = player_data[(player_data['SEASON_ID'] == season) & (player_data['TEAM_NAME'] == opponent)]
    
    if not opponent_players.empty:
        # Calculate average stats for the opponent team
        avg_pts = opponent_players['PTS'].mean()
        avg_ast = opponent_players['AST'].mean()
        avg_reb = opponent_players['REB'].mean()
    else:
        # Default values for missing opponent data
        avg_pts = 0
        avg_ast = 0
        avg_reb = 0
    
    return avg_pts, avg_ast, avg_reb



## Build the Convolutional Neural Network (CNN)

In [28]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Dropout, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the CNN model using "sample" structure
visible = Input(shape=(X_train.shape[1], 1))
conv1 = Conv1D(128, kernel_size=3, activation='relu')(visible)
pool1 = MaxPooling1D(pool_size=2)(conv1)
drop1 = Dropout(0.2)(pool1)
flat = Flatten()(drop1)

# Interpretation model
dense1 = Dense(64, activation='relu')(flat)
drop2 = Dropout(0.2)(dense1)
dense2 = Dense(32, activation='relu')(dense1)
drop3 = Dropout(0.2)(dense2)
output = Dense(1, activation='sigmoid')(drop2)  # Output layer + sigmoid for binary classification

# Model creation
model_cnn = Model(inputs=visible, outputs=output)
 
# Summarize layers
print(model_cnn.summary())

# Compile the model
model_cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 6, 1)]            0         
                                                                 
 conv1d (Conv1D)             (None, 4, 128)            512       
                                                                 
 max_pooling1d (MaxPooling1  (None, 2, 128)            0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 2, 128)            0         
                                                                 
 flatten (Flatten)           (None, 256)               0         
                                                                 
 dense_12 (Dense)            (None, 64)                16448     
                                                             

In [29]:
# Set up callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_cnn_model.keras', save_best_only=True, monitor='val_loss')

# Check before training to ensure there arent any missing or Nan values
assert not np.any(np.isnan(X)), "Input X contains NaN values."
assert not np.any(np.isnan(y)), "Target y contains NaN values."

# Train the model
history = model_cnn.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    callbacks=[early_stopping, model_checkpoint],
    verbose=1
)

# Print training completion
print("CNN model training complete.")


AssertionError: Input X contains NaN values.