In [18]:
import requests
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import datetime
import joblib

# Fetch Data
def fetch_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an error for bad HTTP responses
        data = response.json()
        return pd.DataFrame(data)
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return pd.DataFrame()  # Return an empty DataFrame for safety

# Define features and target
features = [
    "goalsScored", "assists", "cleanSheets", "penaltiesSaved", "penaltiesMissed",
    "ownGoals", "yellowCards", "redCards", "saves", "bonus", "bonusPointsSystem",
    "dreamTeamCount", "expectedGoals", "expectedAssists", "expectedGoalInvolvements",
    "expectedGoalsConceded", "expectedGoalsPer90", "expectedAssistsPer90",
    "goalsConcededPer90", "startsPer90", "cleanSheetsPer90"
]
target = "totalPoints"

# Fetch and preprocess data
url = 'http://fantasyedgeai.runasp.net/api/player/data'
data = fetch_data(url)

# Combine firstName and secondName into a single playerName column
data["playerName"] = data["firstName"] + " " + data["secondName"]

# Sort data by playerName and gameweek
data = data.sort_values(by=["playerName", "gameWeek"])

# Create previousPoints and rolling features
data["previousPoints"] = data.groupby("playerName")["totalPoints"].shift(1)
data["avgPointsLast3"] = data.groupby("playerName")["totalPoints"].rolling(3).mean().reset_index(0, drop=True)
data["maxPointsLast5"] = data.groupby("playerName")["totalPoints"].rolling(5).max().reset_index(0, drop=True)

# Drop rows with NaN in previousPoints or rolling features
data = data.dropna(subset=["previousPoints", "avgPointsLast3", "maxPointsLast5"])

# Handle datetime issues
# Convert 'gameWeek' to actual datetime if it isn't already
if 'gameWeek' in data.columns and pd.to_datetime(data['gameWeek'], errors='coerce').notna().any():
    data['gameWeek'] = pd.to_datetime(data['gameWeek'], errors='coerce')

# Now, we can add additional date-based features if necessary
data['daysSinceLastGame'] = (datetime.datetime.now() - data['gameWeek']).dt.days

# Prepare features and target
X = data[features + ["avgPointsLast3", "maxPointsLast5", "daysSinceLastGame"]]
y = data[target]

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Hyperparameter Tuning using GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

# RandomForest Model
rf_model = RandomForestRegressor(random_state=42)

grid_search = GridSearchCV(
    estimator=rf_model,
    param_grid=param_grid,
    cv=5,
    n_jobs=-1,
    scoring='neg_mean_squared_error'
)

grid_search.fit(X_train, y_train)

# Retrieve Best Parameters
best_params = grid_search.best_params_

# Recreate the model with best parameters
best_model = RandomForestRegressor(random_state=42, **best_params)

# Fit the model
best_model.fit(X_train, y_train)

# Cross-Validation
cv_scores = cross_val_score(best_model, X_scaled, y, cv=5, scoring='neg_mean_squared_error')
print(f"Cross-Validation MSE: {-cv_scores.mean():.2f}")

# Evaluate the model
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R2 Score: {r2:.2f}")

# Save the trained RandomForest model
joblib.dump(best_model, "fantasy_edge_rf_model.pkl")

# Save the data to CSV
data.to_csv('Player_Data.csv', index=False)



405 fits failed out of a total of 1215.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
91 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Yousef Saad\AppData\Roaming\Python\Python313\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Yousef Saad\AppData\Roaming\Python\Python313\site-packages\sklearn\base.py", line 1382, in wrapper
    estimator._validate_params()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "C:\Users\Yousef Saad\AppData\Roaming\Python\Python313\site-packages\sklearn\base.py", line 436, in _validate_params
    validate_parameter_constrain

Cross-Validation MSE: 0.14
Mean Squared Error (MSE): 0.14
R2 Score: 0.97


In [19]:
def predict_player(player_name):
    # Filter player data by name
    player_data = data[data["playerName"] == player_name]
    if player_data.empty:
        return f"Player '{player_name}' not found in the dataset."

    # Prepare player features (latest gameweek)
    player_features = player_data[features + ["avgPointsLast3", "maxPointsLast5", "daysSinceLastGame"]].iloc[-1:]
    player_features_scaled = scaler.transform(player_features)  # Scaling the features
    
    # Predict next gameweek points
    predicted_points = best_model.predict(player_features_scaled)[0]
    
    # Get previous points for the player (last gameweek)
    previous_points = player_data["previousPoints"].iloc[-1]
    
    # Calculate percentage change and trend
    if previous_points != 0:
        percentage_change = ((predicted_points - previous_points) / previous_points) * 100
    else:
        percentage_change = 0  # or another default value like 'N/A'
    
    trend = "Increasing" if percentage_change > 0 else "Decreasing"

    # Check if the player is a goalkeeper (position 1)
    position = player_data["position"].values[0]
    is_goalkeeper = position == 1

    # Calculate average percentages for non-goalkeepers (using the last 5 gameweeks)
    if not is_goalkeeper:
        # Calculate total assists and goals over the last 5 gameweeks
        total_assists_last_5 = player_data["assists"].tail(5).sum()  # Total assists in the last 5 gameweeks
        total_goals_last_5 = player_data["goalsScored"].tail(5).sum()  # Total goals scored in the last 5 gameweeks
        
        # Calculate total assists and goals from all gameweeks
        total_assists_all = player_data["assists"].sum()  # Total assists from all gameweeks
        total_goals_all = player_data["goalsScored"].sum()  # Total goals from all gameweeks

        # Calculate percentages (avoiding division by zero)
        assists_percentage = (total_assists_last_5 / total_assists_all) * 100 if total_assists_all > 0 else 0
        goals_percentage = (total_goals_last_5 / total_goals_all) * 100 if total_goals_all > 0 else 0
    else:
        # For goalkeepers, calculate average clean sheet percentage
        clean_sheets_last_5 = player_data["cleanSheets"].tail(5).mean()  # Average clean sheets in the last 5 gameweeks
        total_points_last_5 = player_data["totalPoints"].tail(5).sum()  # Total points in the last 5 gameweeks
        
        # Calculate clean sheet percentage (avoiding division by zero)
        clean_sheet_percentage = (clean_sheets_last_5 / total_points_last_5) * 100 if total_points_last_5 > 0 else 0

    # Prepare the result dictionary with formatted values
    result = {
        "playerName": player_name,
        "predictedPoints": f"{predicted_points:.2f}",  # Format predicted points to 2 decimal places
        "percentageChange": f"{percentage_change:.2f}%",  # Format percentage change to 2 decimal places
        "trend": trend
    }

    # Add statistics based on whether the player is a goalkeeper or not
    if not is_goalkeeper:
        result["predictedaAssistsPercentage"] = f"{assists_percentage:.2f}%"  # Format assists percentage to 2 decimal places
        result["predictedGoalsPercentage"] = f"{goals_percentage:.2f}%"  # Format goals percentage to 2 decimal places
    else:
        result["cleanSheetPercentage"] = f"{clean_sheet_percentage:.2f}%"  # Format clean sheet percentage to 2 decimal places

    return result




In [20]:
predict_player("Mohamed Salah")

{'playerName': 'Mohamed Salah',
 'predictedPoints': '2.99',
 'percentageChange': '49.67%',
 'trend': 'Increasing',
 'predictedaAssistsPercentage': '20.00%',
 'predictedGoalsPercentage': '20.00%'}