In [13]:
# club

import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, roc_auc_score, matthews_corrcoef
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from imblearn.over_sampling import RandomOverSampler

data = pd.read_csv("C:\\Users\\co279\\penalty.csv")

data = data.iloc[:, 4:]
data.dropna(inplace=True)

# data preprocessing
data = pd.get_dummies(data, columns=['position'])
data = pd.get_dummies(data, columns=['order'])

label_encoder = LabelEncoder()
data['score'] = label_encoder.fit_transform(data['score'])

X = data.drop(columns=['score'])
y = data['score']

accuracy_list = []
f1_list = []
roc_list = []
mcc_list = []

# iteration = 5
n_iterations = 5

for i in range(n_iterations):
    # data spliting (train, test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=411 + i, stratify=y)

    # oversampling
    oversampler = RandomOverSampler(sampling_strategy='minority', random_state=411)
    X_resampled, y_resampled = oversampler.fit_resample(X_train, y_train)

    # multi-layer perceptron
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(X_resampled.shape[1],)))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    # model compiling
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # model fitting
    model.fit(X_resampled, y_resampled, epochs=10, batch_size=8, validation_data=(X_test, y_test), callbacks=[tf.keras.callbacks.EarlyStopping(patience=3, monitor='val_loss')])

    y_pred_prob = model.predict(X_test)
    y_pred = [1 if prob > 0.5 else 0 for prob in y_pred_prob]

    # model evaluation
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc = roc_auc_score(y_test, y_pred)
    mcc = matthews_corrcoef(y_test, y_pred)

    accuracy_list.append(accuracy)
    f1_list.append(f1)
    roc_list.append(roc)
    mcc_list.append(mcc)

    print(f"Iteration {i+1} - accuracy: {accuracy}, f1_score: {f1}, roc_auc: {roc}, MCC: {mcc}")

# average
avg_accuracy = np.mean(accuracy_list)
avg_f1 = np.mean(f1_list)
avg_roc = np.mean(roc_list)
avg_mcc = np.mean(mcc_list)

print("\nAverage Metrics after 5 Iterations:")
print(f"Average accuracy: {avg_accuracy}")
print(f"Average f1_score: {avg_f1}")
print(f"Average roc_auc: {avg_roc}")
print(f"Average MCC: {avg_mcc}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Iteration 1 - accuracy: 0.6795454545454546, f1_score: 0.36771300448430494, roc_auc: 0.579623771325274, MCC: 0.1541362232697253
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Iteration 2 - accuracy: 0.4068181818181818, f1_score: 0.4, roc_auc: 0.5481019093887696, MCC: 0.09472378409088433
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Iteration 3 - accuracy: 0.44772727272727275, f1_score: 0.3848101265822785, roc_auc: 0.539628290588634, MCC: 0.07138781764977484
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Iteration 4 - accuracy: 0.7659090909090909, f1_score: 0.21374045801526717, roc_auc: 0.5495706699807931, MCC: 0.18314155517079572
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Iteration 5 - accuracy: 0.5977272727272728, f1_score: 0.39590443686006827, roc_auc: 0.5804711332052875, MCC: 0.13922435514315104

Average Metrics after 5 Iterations:
Average accuracy: 0.5795454545454545
Average f1_sco

In [24]:
# international

import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, roc_auc_score, matthews_corrcoef
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from imblearn.over_sampling import RandomOverSampler

data = pd.read_csv("C:\\Users\\co279\\penalty_international.csv")

data = data.iloc[:, 4:]
data.dropna(inplace=True)

# data preprocessing
data = pd.get_dummies(data, columns=['position'])
data = pd.get_dummies(data, columns=['order'])

label_encoder = LabelEncoder()
data['score'] = label_encoder.fit_transform(data['score'])

X = data.drop(columns=['score'])
y = data['score']

accuracy_list = []
f1_list = []
roc_list = []
mcc_list = []

# iteration = 5
n_iterations = 5

for i in range(n_iterations):
    # data spliting (train, test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=411 + i, stratify=y)

    # oversampling
    oversampler = RandomOverSampler(sampling_strategy='minority', random_state=411)
    X_resampled, y_resampled = oversampler.fit_resample(X_train, y_train)

    # multi-layer perceptron
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(X_resampled.shape[1],)))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    # model compiling
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # model fitting
    model.fit(X_resampled, y_resampled, epochs=10, batch_size=8, validation_data=(X_test, y_test), callbacks=[tf.keras.callbacks.EarlyStopping(patience=3, monitor='val_loss')])

    y_pred_prob = model.predict(X_test)
    y_pred = [1 if prob > 0.5 else 0 for prob in y_pred_prob]

    # model evaluation
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc = roc_auc_score(y_test, y_pred)
    mcc = matthews_corrcoef(y_test, y_pred)

    accuracy_list.append(accuracy)
    f1_list.append(f1)
    roc_list.append(roc)
    mcc_list.append(mcc)

    print(f"Iteration {i+1} - accuracy: {accuracy}, f1_score: {f1}, roc_auc: {roc}, MCC: {mcc}")

# average
avg_accuracy = np.mean(accuracy_list)
avg_f1 = np.mean(f1_list)
avg_roc = np.mean(roc_list)
avg_mcc = np.mean(mcc_list)

print("\nAverage Metrics after 5 Iterations:")
print(f"Average accuracy: {avg_accuracy}")
print(f"Average f1_score: {avg_f1}")
print(f"Average roc_auc: {avg_roc}")
print(f"Average MCC: {avg_mcc}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Iteration 1 - accuracy: 0.47058823529411764, f1_score: 0.35714285714285715, roc_auc: 0.48464808691544636, MCC: -0.027785783912241586
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Iteration 2 - accuracy: 0.28431372549019607, f1_score: 0.44274809160305345, roc_auc: 0.5, MCC: 0.0
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Iteration 3 - accuracy: 0.35294117647058826, f1_score: 0.45, roc_auc: 0.5271610769957487, MCC: 0.07899877785206605
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Iteration 4 - accuracy: 0.3333333333333333, f1_score: 0.423728813559322, roc_auc: 0.49267831837505904, MCC: -0.01980772442907568
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Iteration 5 - accuracy: 0.5294117647058824, f1_score: 0.25, roc_auc: 0.4529995276334436, MCC: -0.08931434628787992

Average Metrics after 5 Iterations:
Average accuracy: 0.3941176470588236
Average f1_score: 0.3847239524610465
Average roc_auc: 0.4914974019839396
Average M