In [None]:
import pandas as pd
import numpy as np
import pickle
from math import sqrt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

# Load dataset
df = pd.read_csv('players_21.csv')

# Drop unnecessary columns including economic & preference-related
columns_to_drop = [
    'sofifa_id', 'player_url', 'short_name', 'long_name', 'dob',
    'club_name', 'league_name', 'team_position', 'team_jersey_number',
    'loaned_from', 'joined', 'contract_valid_until',
    'nation_position', 'nation_jersey_number',
    'real_face', 'body_type', 'player_tags', 'player_traits',
    'ls', 'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw',
    'lam', 'cam', 'ram', 'lm', 'lcm', 'cm', 'rcm', 'rm',
    'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb',
    'value_eur', 'wage_eur', 'release_clause_eur',  # economics
    'preferred_foot', 'work_rate'  # preferences
]
df.drop(columns=columns_to_drop, axis=1, inplace=True)

# Filter out goalkeepers
df = df[df['player_positions'].notna()]
df = df[~df['player_positions'].str.contains('GK')]

# Drop 'player_positions' after filtering
df.drop('player_positions', axis=1, inplace=True)

# Fill missing values
df.fillna(df.median(numeric_only=True), inplace=True)
df.fillna(0, inplace=True)

# Define features and target
target = 'overall'
X = df.drop(columns=[target])
y = df[target]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ensure numeric
X_train = X_train.apply(pd.to_numeric, errors='coerce').fillna(0)
X_test = X_test.apply(pd.to_numeric, errors='coerce').fillna(0)

# Train model
model = XGBRegressor(n_estimators=200, learning_rate=0.1, max_depth=6, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = sqrt(mean_squared_error(y_test, y_pred))

print(f"✅ R² Score: {r2:.4f}")
print(f"📉 RMSE: {rmse:.4f}")

# Save model
with open('xgb_fifa21_model_trimmed.pkl', 'wb') as f:
    pickle.dump(model, f)

print("💾 Model saved as 'xgb_fifa21_model_trimmed.pkl'")


✅ R² Score: 0.9843
📉 RMSE: 0.8559
