In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
fifa17_raw_data = pd.read_csv("data/FIFA17_official_data.csv")
fifa18_raw_data = pd.read_csv("data/FIFA18_official_data.csv")
fifa19_raw_data = pd.read_csv("data/FIFA19_official_data.csv")
fifa20_raw_data = pd.read_csv("data/FIFA20_official_data.csv")
fifa21_raw_data = pd.read_csv("data/FIFA21_official_data.csv")

old_raw_data = pd.concat([fifa17_raw_data, fifa18_raw_data, fifa19_raw_data, fifa20_raw_data, fifa21_raw_data], ignore_index=True)
old_raw_data.shape

In [None]:
old_raw_data = old_raw_data.drop(["Photo", "Flag", "Club Logo", "Special", "International Reputation", "Body Type", "Real Face", "Joined",
                                  "Loaned From","Contract Valid Until", "Release Clause", "Best Position", "Best Overall Rating", "Potential",
                                  "ID", "Name", "Nationality", "Club", "Value", "Wage", "Preferred Foot", "Weak Foot", "Skill Moves",
                                  "Work Rate", "Jersey Number", "Height", "Weight", "Position", "Age", "Marking", "GKDiving", "GKHandling",
                                  "GKKicking", "GKPositioning", "GKReflexes", "DefensiveAwareness"],
                                 axis=1, inplace=False)
old_raw_data.drop(old_raw_data[old_raw_data["Position"] == "GK"].index, inplace=True)
old_raw_data.dropna(inplace=True)
old_raw_data.shape

In [None]:
bins = [-1, 64, 74, 100]
labels = ["Bronze", "Silver", "Gold"]
old_raw_data["Rank"] = pd.cut(old_raw_data["Overall"], bins=bins, labels=labels, include_lowest=True)
old_raw_data = old_raw_data.drop(["Overall"], axis=1, inplace=False)

old_raw_data

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

X = old_raw_data.drop("Rank", axis=1)
y = old_raw_data["Rank"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
rfc = RandomForestClassifier(random_state=42)
rfc.fit(X_train, y_train)

In [None]:
y_pred = rfc.predict(X_test)

rfc_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {rfc_accuracy: .2f}")

print("Classification report:")
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.title("Confusion matrix")
plt.show();

In [None]:
pred_data = pd.read_csv("data/FIFA22_official_data.csv")

pred_data = pred_data.drop(["Photo", "Flag", "Club Logo", "Special", "International Reputation", "Body Type", "Real Face", "Joined",
                                  "Loaned From","Contract Valid Until", "Release Clause", "Best Position", "Best Overall Rating", "Potential",
                                  "ID", "Name", "Nationality", "Club", "Value", "Wage", "Preferred Foot", "Weak Foot", "Skill Moves",
                                  "Work Rate", "Jersey Number", "Height", "Weight", "Position", "Age", "Marking", "GKDiving", "GKHandling",
                                  "GKKicking", "GKPositioning", "GKReflexes", "DefensiveAwareness"],
                                 axis=1, inplace=False)
pred_data.drop(pred_data[pred_data["Position"] == "GK"].index, inplace=True)
pred_data.dropna(inplace=True)

pred_data["Rank"] = pd.cut(pred_data["Overall"], bins=bins, labels=labels, include_lowest=True)
pred_data = pred_data.drop(["Overall"], axis=1, inplace=False)

X_pred = pred_data.drop("Rank", axis=1)
labels = pred_data["Rank"]

In [None]:
y_pred = rfc.predict(X_pred)

rfc_accuracy = accuracy_score(labels, y_pred)
print(f"Accuracy: {rfc_accuracy: .2f}")

print("Classification report:")
print(classification_report(labels, y_pred))

In [None]:
conf_matrix = confusion_matrix(labels, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.title("Confusion matrix")
plt.show();