In [3]:
import pandas as pd
import numpy as np 
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

In [4]:
data = pd.read_csv('FIFA 2018 Statistics.csv')

In [5]:
data

Unnamed: 0,Date,Team,Opponent,Goal Scored,Ball Possession %,Attempts,On-Target,Off-Target,Blocked,Corners,...,Yellow Card,Yellow & Red,Red,Man of the Match,1st Goal,Round,PSO,Goals in PSO,Own goals,Own goal Time
0,14-06-2018,Russia,Saudi Arabia,5,40,13,7,3,3,6,...,0,0,0,Yes,12.0,Group Stage,No,0,,
1,14-06-2018,Saudi Arabia,Russia,0,60,6,0,3,3,2,...,0,0,0,No,,Group Stage,No,0,,
2,15-06-2018,Egypt,Uruguay,0,43,8,3,3,2,0,...,2,0,0,No,,Group Stage,No,0,,
3,15-06-2018,Uruguay,Egypt,1,57,14,4,6,4,5,...,0,0,0,Yes,89.0,Group Stage,No,0,,
4,15-06-2018,Morocco,Iran,0,64,13,3,6,4,5,...,1,0,0,No,,Group Stage,No,0,1.0,90.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,11-07-2018,England,Croatia,1,46,11,1,6,4,4,...,1,0,0,No,5.0,Semi- Finals,No,0,,
124,14-07-2018,Belgium,England,2,43,12,4,3,5,4,...,1,0,0,Yes,4.0,3rd Place,No,0,,
125,14-07-2018,England,Belgium,0,57,15,5,7,3,5,...,2,0,0,No,,3rd Place,No,0,,
126,15-07-2018,France,Croatia,4,39,8,6,1,1,2,...,2,0,0,Yes,18.0,Final,No,0,1.0,18.0


In [6]:
data.isnull().sum()

Date                        0
Team                        0
Opponent                    0
Goal Scored                 0
Ball Possession %           0
Attempts                    0
On-Target                   0
Off-Target                  0
Blocked                     0
Corners                     0
Offsides                    0
Free Kicks                  0
Saves                       0
Pass Accuracy %             0
Passes                      0
Distance Covered (Kms)      0
Fouls Committed             0
Yellow Card                 0
Yellow & Red                0
Red                         0
Man of the Match            0
1st Goal                   34
Round                       0
PSO                         0
Goals in PSO                0
Own goals                 116
Own goal Time             116
dtype: int64

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128 entries, 0 to 127
Data columns (total 27 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Date                    128 non-null    object 
 1   Team                    128 non-null    object 
 2   Opponent                128 non-null    object 
 3   Goal Scored             128 non-null    int64  
 4   Ball Possession %       128 non-null    int64  
 5   Attempts                128 non-null    int64  
 6   On-Target               128 non-null    int64  
 7   Off-Target              128 non-null    int64  
 8   Blocked                 128 non-null    int64  
 9   Corners                 128 non-null    int64  
 10  Offsides                128 non-null    int64  
 11  Free Kicks              128 non-null    int64  
 12  Saves                   128 non-null    int64  
 13  Pass Accuracy %         128 non-null    int64  
 14  Passes                  128 non-null    in

In [8]:
# preprocessing

In [9]:
data.drop('Date' , axis=1 , inplace = True)

In [10]:
data.drop(['Own goals' , 'Own goal Time'], axis = 1 , inplace  = True)

In [11]:
data['1st Goal'] = data['1st Goal'].fillna(data['1st Goal'].mean())

In [12]:
# encoding

In [13]:
data.dtypes

Team                       object
Opponent                   object
Goal Scored                 int64
Ball Possession %           int64
Attempts                    int64
On-Target                   int64
Off-Target                  int64
Blocked                     int64
Corners                     int64
Offsides                    int64
Free Kicks                  int64
Saves                       int64
Pass Accuracy %             int64
Passes                      int64
Distance Covered (Kms)      int64
Fouls Committed             int64
Yellow Card                 int64
Yellow & Red                int64
Red                         int64
Man of the Match           object
1st Goal                  float64
Round                      object
PSO                        object
Goals in PSO                int64
dtype: object

In [14]:
print(f"Team: {data['Team'].unique()}\n")

Team: ['Russia' 'Saudi Arabia' 'Egypt' 'Uruguay' 'Morocco' 'Iran' 'Portugal'
 'Spain' 'France' 'Australia' 'Argentina' 'Iceland' 'Peru' 'Denmark'
 'Croatia' 'Nigeria' 'Costa Rica' 'Serbia' 'Germany' 'Mexico' 'Brazil'
 'Switzerland' 'Sweden' 'Korea Republic' 'Belgium' 'Panama' 'Tunisia'
 'England' 'Colombia' 'Japan' 'Poland' 'Senegal']



In [15]:
print(f"Opponent: {data['Opponent'].unique()}\n")

Opponent: ['Saudi Arabia' 'Russia' 'Uruguay' 'Egypt' 'Iran' 'Morocco' 'Spain'
 'Portugal' 'Australia' 'France' 'Iceland' 'Argentina' 'Denmark' 'Peru'
 'Nigeria' 'Croatia' 'Serbia' 'Costa Rica' 'Mexico' 'Germany'
 'Switzerland' 'Brazil' 'Korea Republic' 'Sweden' 'Panama' 'Belgium'
 'England' 'Tunisia' 'Japan' 'Colombia' 'Senegal' 'Poland']



In [16]:
print(f"Man of the match: {data['Man of the Match'].unique()}\n")

Man of the match: ['Yes' 'No']



In [17]:
print(f"round: {data['Round'].unique()}\n")

round: ['Group Stage' 'Round of 16' 'Quarter Finals' 'Semi- Finals' '3rd Place'
 'Final']



In [18]:
print(f" PsO : {data['PSO'].unique()}\n")

 PsO : ['No' 'Yes']



In [19]:
label_encoder = LabelEncoder()

data['Man of the Match'] = label_encoder.fit_transform(data['Man of the Match'])
man_mappings = {index: label for index, label in enumerate(label_encoder.classes_)}

data['PSO'] = label_encoder.fit_transform(data['PSO'])
pso_mappings = {index: label for index, label in enumerate(label_encoder.classes_)}

In [20]:
data

Unnamed: 0,Team,Opponent,Goal Scored,Ball Possession %,Attempts,On-Target,Off-Target,Blocked,Corners,Offsides,...,Distance Covered (Kms),Fouls Committed,Yellow Card,Yellow & Red,Red,Man of the Match,1st Goal,Round,PSO,Goals in PSO
0,Russia,Saudi Arabia,5,40,13,7,3,3,6,3,...,118,22,0,0,0,1,12.000000,Group Stage,0,0
1,Saudi Arabia,Russia,0,60,6,0,3,3,2,1,...,105,10,0,0,0,0,39.457447,Group Stage,0,0
2,Egypt,Uruguay,0,43,8,3,3,2,0,1,...,112,12,2,0,0,0,39.457447,Group Stage,0,0
3,Uruguay,Egypt,1,57,14,4,6,4,5,1,...,111,6,0,0,0,1,89.000000,Group Stage,0,0
4,Morocco,Iran,0,64,13,3,6,4,5,0,...,101,22,1,0,0,0,39.457447,Group Stage,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,England,Croatia,1,46,11,1,6,4,4,3,...,148,14,1,0,0,0,5.000000,Semi- Finals,0,0
124,Belgium,England,2,43,12,4,3,5,4,1,...,108,11,1,0,0,1,4.000000,3rd Place,0,0
125,England,Belgium,0,57,15,5,7,3,5,0,...,110,5,2,0,0,0,39.457447,3rd Place,0,0
126,France,Croatia,4,39,8,6,1,1,2,1,...,99,14,2,0,0,1,18.000000,Final,0,0


In [21]:
round_values = list(data["Round"].unique())


In [22]:
round_values

['Group Stage',
 'Round of 16',
 'Quarter Finals',
 'Semi- Finals',
 '3rd Place',
 'Final']

In [23]:
round_mappings = {label: index for index, label in enumerate(round_values)}
round_mappings

{'Group Stage': 0,
 'Round of 16': 1,
 'Quarter Finals': 2,
 'Semi- Finals': 3,
 '3rd Place': 4,
 'Final': 5}

In [24]:
data['Round'] = data['Round'].apply(lambda x: round_mappings[x])

In [25]:
data['Team'].unique()

array(['Russia', 'Saudi Arabia', 'Egypt', 'Uruguay', 'Morocco', 'Iran',
       'Portugal', 'Spain', 'France', 'Australia', 'Argentina', 'Iceland',
       'Peru', 'Denmark', 'Croatia', 'Nigeria', 'Costa Rica', 'Serbia',
       'Germany', 'Mexico', 'Brazil', 'Switzerland', 'Sweden',
       'Korea Republic', 'Belgium', 'Panama', 'Tunisia', 'England',
       'Colombia', 'Japan', 'Poland', 'Senegal'], dtype=object)

In [26]:
data['Opponent'].unique()

array(['Saudi Arabia', 'Russia', 'Uruguay', 'Egypt', 'Iran', 'Morocco',
       'Spain', 'Portugal', 'Australia', 'France', 'Iceland', 'Argentina',
       'Denmark', 'Peru', 'Nigeria', 'Croatia', 'Serbia', 'Costa Rica',
       'Mexico', 'Germany', 'Switzerland', 'Brazil', 'Korea Republic',
       'Sweden', 'Panama', 'Belgium', 'England', 'Tunisia', 'Japan',
       'Colombia', 'Senegal', 'Poland'], dtype=object)

In [27]:
pd.get_dummies(data['Team'])

Unnamed: 0,Argentina,Australia,Belgium,Brazil,Colombia,Costa Rica,Croatia,Denmark,Egypt,England,...,Portugal,Russia,Saudi Arabia,Senegal,Serbia,Spain,Sweden,Switzerland,Tunisia,Uruguay
0,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
124,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
125,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
126,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [28]:
pd.get_dummies(data['Opponent'].apply(lambda x: "opp_" + x))

Unnamed: 0,opp_Argentina,opp_Australia,opp_Belgium,opp_Brazil,opp_Colombia,opp_Costa Rica,opp_Croatia,opp_Denmark,opp_Egypt,opp_England,...,opp_Portugal,opp_Russia,opp_Saudi Arabia,opp_Senegal,opp_Serbia,opp_Spain,opp_Sweden,opp_Switzerland,opp_Tunisia,opp_Uruguay
0,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
3,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
124,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
125,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
126,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [29]:
data['Opponent'] = data['Opponent'].apply(lambda x: "opp_" + x)

In [30]:
data

Unnamed: 0,Team,Opponent,Goal Scored,Ball Possession %,Attempts,On-Target,Off-Target,Blocked,Corners,Offsides,...,Distance Covered (Kms),Fouls Committed,Yellow Card,Yellow & Red,Red,Man of the Match,1st Goal,Round,PSO,Goals in PSO
0,Russia,opp_Saudi Arabia,5,40,13,7,3,3,6,3,...,118,22,0,0,0,1,12.000000,0,0,0
1,Saudi Arabia,opp_Russia,0,60,6,0,3,3,2,1,...,105,10,0,0,0,0,39.457447,0,0,0
2,Egypt,opp_Uruguay,0,43,8,3,3,2,0,1,...,112,12,2,0,0,0,39.457447,0,0,0
3,Uruguay,opp_Egypt,1,57,14,4,6,4,5,1,...,111,6,0,0,0,1,89.000000,0,0,0
4,Morocco,opp_Iran,0,64,13,3,6,4,5,0,...,101,22,1,0,0,0,39.457447,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,England,opp_Croatia,1,46,11,1,6,4,4,3,...,148,14,1,0,0,0,5.000000,3,0,0
124,Belgium,opp_England,2,43,12,4,3,5,4,1,...,108,11,1,0,0,1,4.000000,4,0,0
125,England,opp_Belgium,0,57,15,5,7,3,5,0,...,110,5,2,0,0,0,39.457447,4,0,0
126,France,opp_Croatia,4,39,8,6,1,1,2,1,...,99,14,2,0,0,1,18.000000,5,0,0


In [31]:
data_concat = pd.concat([data, pd.get_dummies(data['Team']), pd.get_dummies(data['Opponent'])], axis=1)

In [32]:
data_concat.drop(['Team', 'Opponent'], axis=1, inplace=True)

In [33]:
data_concat

Unnamed: 0,Goal Scored,Ball Possession %,Attempts,On-Target,Off-Target,Blocked,Corners,Offsides,Free Kicks,Saves,...,opp_Portugal,opp_Russia,opp_Saudi Arabia,opp_Senegal,opp_Serbia,opp_Spain,opp_Sweden,opp_Switzerland,opp_Tunisia,opp_Uruguay
0,5,40,13,7,3,3,6,3,11,0,...,False,False,True,False,False,False,False,False,False,False
1,0,60,6,0,3,3,2,1,25,2,...,False,True,False,False,False,False,False,False,False,False
2,0,43,8,3,3,2,0,1,7,3,...,False,False,False,False,False,False,False,False,False,True
3,1,57,14,4,6,4,5,1,13,3,...,False,False,False,False,False,False,False,False,False,False
4,0,64,13,3,6,4,5,0,14,2,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,1,46,11,1,6,4,4,3,24,5,...,False,False,False,False,False,False,False,False,False,False
124,2,43,12,4,3,5,4,1,5,5,...,False,False,False,False,False,False,False,False,False,False
125,0,57,15,5,7,3,5,0,12,2,...,False,False,False,False,False,False,False,False,False,False
126,4,39,8,6,1,1,2,1,14,1,...,False,False,False,False,False,False,False,False,False,False


In [34]:
np.sum(data_concat.dtypes == 'object')

0

In [35]:
# scaling

In [36]:
y = data_concat['Man of the Match']
X = data_concat.drop('Man of the Match', axis=1)

In [37]:
scaler = RobustScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [38]:
X

Unnamed: 0,Goal Scored,Ball Possession %,Attempts,On-Target,Off-Target,Blocked,Corners,Offsides,Free Kicks,Saves,...,opp_Portugal,opp_Russia,opp_Saudi Arabia,opp_Senegal,opp_Serbia,opp_Spain,opp_Sweden,opp_Switzerland,opp_Tunisia,opp_Uruguay
0,2.0,-0.6250,0.166667,1.166667,-0.666667,0.000000,0.333333,1.0,-0.571429,-0.666667,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.5,0.6250,-1.000000,-1.166667,-0.666667,0.000000,-1.000000,0.0,1.428571,0.000000,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.5,-0.4375,-0.666667,-0.166667,-0.666667,-0.444444,-1.666667,0.0,-1.142857,0.333333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.4375,0.333333,0.166667,0.333333,0.444444,0.000000,0.0,-0.285714,0.333333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.5,0.8750,0.166667,-0.166667,0.333333,0.444444,0.000000,-0.5,-0.142857,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,0.0,-0.2500,-0.166667,-0.833333,0.333333,0.444444,-0.333333,1.0,1.285714,1.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
124,0.5,-0.4375,0.000000,0.166667,-0.666667,0.888889,-0.333333,0.0,-1.428571,1.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
125,-0.5,0.4375,0.500000,0.500000,0.666667,0.000000,0.000000,-0.5,-0.428571,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
126,1.5,-0.6875,-0.666667,0.833333,-1.333333,-0.888889,-1.000000,0.0,-0.142857,-0.333333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
# Spliting the data 

In [40]:
X_train , X_test , y_train , y_test = train_test_split(X,y,train_size=0.8) 

In [41]:
# Training

In [42]:
sk_model = MLPClassifier(hidden_layer_sizes=(16,16))
sk_model.fit(X_train , y_train)



In [43]:
inputs = tf.keras.Input(shape=(85,))
x = tf.keras.layers.Dense(128, activation=tf.nn.relu)(inputs)
x = tf.keras.layers.Dense(128, activation=tf.nn.relu)(x)
outputs = tf.keras.layers.Dense(2, activation=tf.nn.softmax)(x)

tf_model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [44]:
tf_model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

In [45]:
tf_model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=32,
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x26b2194e790>

In [46]:
# result

In [47]:
sk_score = sk_model.score(X_test, y_test)
tf_score = tf_model.evaluate(X_test, y_test, verbose=False)

In [48]:
print(f"   sklearn Model: {sk_score}")
print(f"TensorFlow Model: {tf_score[1]}")

   sklearn Model: 0.7307692307692307
TensorFlow Model: 0.7692307829856873
