In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import SelectKBest, f_regression


In [None]:
# Load and preprocess the dataset
def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    # Handle null values - assuming dropping rows with any null value
    df.dropna(inplace=True)
    
    # Convert 'GameType' into binary feature (0 for regular season, 1 for playoff)
    df['GameType'] = df['GameType'].apply(lambda x: 1 if x.lower() == 'playoff' else 0)
    
    features = df[['AvgPointsPerGame', 'ShotAccuracy', 'TeamPointAllowance', 
                   'ThreePointsMade', 'TotalReboundsAndAssists', 'GameType']]
    target = df['TotalPoints']
    return features, target


In [None]:
def feature_engineering_and_ranking(X, y):
    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Fit model to get feature importances
    model = GradientBoostingRegressor(n_estimators=235, learning_rate=0.2, max_depth=4, random_state=42)
    model.fit(X_scaled, y)
    
    # Get feature importance scores
    importances = model.feature_importances_
    feature_names = X.columns
    feature_importance_ranking = sorted(zip(importances, feature_names), reverse=True)
    
    print("Feature Importance Ranking:")
    for importance, name in feature_importance_ranking:
        print(f"{name}: {importance}")
    
    return model  # Returning model for further use if needed

In [None]:
# Main function for model training and evaluation
def train_and_evaluate(features, target):
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
    
    # Feature engineering and ranking
    model = feature_engineering_and_ranking(X_train, y_train)
    
    # Evaluation pipeline
    pipeline = Pipeline([('scaler', StandardScaler()), ('model', model)])
    
    # Predictions
    predictions = pipeline.predict(X_test)
    
    # Evaluation metrics
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    explained_variance = explained_variance_score(y_test, predictions)
    
    print(f'Mean Squared Error: {mse}')
    print(f'Mean Absolute Error: {mae}')
    print(f'R^2 Score: {r2}')
    print(f'Explained Variance Score: {explained_variance}')

In [None]:
if __name__ == "__main__":
    file_path = "nba_player_data.csv"  # Update with the actual path to your dataset
    features, target = load_and_preprocess_data(file_path)
    features_selected = feature_engineering(features)
    train_and_evaluate(features_selected, target)