In [1]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from imblearn.over_sampling import RandomOverSampler
from imblearn.pipeline import Pipeline as ImbPipeline
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Remove display restrictions
pd.set_option('display.max_columns', None)  
pd.set_option('display.width', None) 

# Load your data
data = pd.read_csv(r'/Users/udoychowdhury/Documents/DataScience/Soccer Data/Male_FIFA_24_Players.csv')

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


## Define Threshold

In [2]:
# Define high potential players
data['high_potential'] = ((data['potential'] - data['overall'] >= 5) & (data['age'] < 25)).astype(int)

# Check the balance of the new column
# data_fifa24['high_potential'].value_counts()
# high_potential_players = data_fifa24[data_fifa24['high_potential'] == 1]
# display(high_potential_players)

## Logistic Regression Model

In [3]:
# Selecting features
features = data[[
    # 'age', 
    # 'overall', 
    # 'potential', 
    'value_eur', 
    'wage_eur', 
    'height_cm', 
    'weight_kg', 
    'preferred_foot', 
    'weak_foot',
    'skill_moves', 
    'work_rate', 
    'body_type', 
    'pace', 
    'shooting', 
    'passing', 
    'dribbling', 
    'defending', 
    'physic', 
    'skill_dribbling', 
    'skill_curve',
    'skill_fk_accuracy', 
    'skill_long_passing', 
    'skill_ball_control', 
    'mentality_aggression',
    'mentality_interceptions', 
    'mentality_positioning', 
    'mentality_vision', 
    'mentality_penalties',
    'mentality_composure']]

# Preprocessing for numerical and categorical data
numerical_cols = features.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = features.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols),
    ])

# Prepare features
X = features
y = data['high_potential']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [4]:
# Building the pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', LogisticRegression(random_state=42))])

# Training the model
pipeline.fit(X_train, y_train)

# Predicting the test set
y_pred = pipeline.predict(X_test)

# Evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Display the evaluation results
print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7598896044158233
F1 Score: 0.7310202679491584

Confusion Matrix:
 [[1414  373]
 [ 410 1064]]

Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.79      0.78      1787
           1       0.74      0.72      0.73      1474

    accuracy                           0.76      3261
   macro avg       0.76      0.76      0.76      3261
weighted avg       0.76      0.76      0.76      3261



## Save the model

In [35]:
joblib.dump(pipeline, r'/Users/udoychowdhury/Documents/DataScience/Soccer Data/fifa_model.pkl')

['/Users/udoychowdhury/Documents/DataScience/Soccer Data/fifa_model.pkl']

## Test the model

In [5]:
#Load the model
model = joblib.load(r'/Users/udoychowdhury/Documents/DataScience/Machine Learning/fifa_model.pkl')

In [6]:
# Function to use model
def predict_high_potential(short_name):
    player_data = data[data['short_name'] == short_name]
    
    # Check if the player exists or the name is correct
    if player_data.empty:
        return "Player not found."

    # Select features for that player name
    features = player_data[[
        # 'age', 
        # 'overall',
        # 'potential',
        'value_eur', 
        'wage_eur', 
        'height_cm', 
        'weight_kg', 
        'preferred_foot', 
        'weak_foot',
        'skill_moves', 
        'work_rate', 
        'body_type', 
        'pace', 
        'shooting', 
        'passing', 
        'dribbling', 
        'defending', 
        'physic', 
        'skill_dribbling', 
        'skill_curve',
        'skill_fk_accuracy', 
        'skill_long_passing', 
        'skill_ball_control', 
        'mentality_aggression',
        'mentality_interceptions', 
        'mentality_positioning', 
        'mentality_vision', 
        'mentality_penalties',
        'mentality_composure'
    ]]
    
    # Preprocess features and make a prediction
    prediction = model.predict(features)
    prediction_proba = model.predict_proba(features)

    # Get player information
    overall = player_data['overall'].values[0]
    potential = player_data['potential'].values[0]
    improvement = potential - overall
    improvement_percentage = (improvement / overall) * 100

    # Extract confidence for both classes
    confidence_high_potential = prediction_proba[0, 1] * 100
    confidence_not_high_potential = prediction_proba[0, 0] * 100

    if prediction[0] == 1:
        return (f"{short_name}, currently rated at {overall}, has the potential to improve by approximately {improvement_percentage:.2f}% "
                f"to reach a potential rating of {potential}. This indicates a promising future and capability for further development. "
                f"Confidence in this prediction of high potential is {confidence_high_potential:.2f}%, suggesting a strong likelihood of achieving such growth.")
    else:
        return (f"{short_name}, with a current rating of {overall}, is predicted not to have high potential for significant improvement, "
                f"expected to remain close to their current performance level. "
                f"Confidence in this prediction is {confidence_not_high_potential:.2f}%, indicating a high certainty that substantial improvement is unlikely.")

In [7]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7598896044158233

Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.79      0.78      1787
           1       0.74      0.72      0.73      1474

    accuracy                           0.76      3261
   macro avg       0.76      0.76      0.76      3261
weighted avg       0.76      0.76      0.76      3261



In [8]:
# Example Usage
result = predict_high_potential('Pedri')
print(result)

Pedri, currently rated at 86, has the potential to improve by approximately 6.98% to reach a potential rating of 92. This indicates a promising future and capability for further development. Confidence in this prediction of high potential is 95.55%, suggesting a strong likelihood of achieving such growth.


In [27]:
# import your library
import secrets

# function to create the api key
def generate_api_key():
    return secrets.token_urlsafe(32)
# Call the function
new_api_key = generate_api_key()

# print out your key
print(new_api_key)

-sH4m4gr7-LzhDMdtinrr-5575urIoXBZwESUQL-uDU


In [45]:
import requests

api_key = '-sH4m4gr7-LzhDMdtinrr-5575urIoXBZwESUQL-uDU'

headers = {
    "accept": "application/json",
    "PPP_API_KEY": api_key
}

try:
    response = requests.get("http://45.55.75.51/v1/predict?value=Gavi", headers=headers)
    response.raise_for_status()  # This will raise an exception for HTTP errors
    result_json = response.json()  # Using .json() to directly get the JSON response
    print(result_json)
except requests.exceptions.HTTPError as err:
    print(f"HTTP Error: {err}")
except Exception as e:
    print(f"An error occurred: {e}")

An error occurred: HTTPConnectionPool(host='45.55.75.51', port=80): Max retries exceeded with url: /v1/predict?value=Gavi (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fb08179d280>: Failed to establish a new connection: [Errno 61] Connection refused'))
