In [1]:
import pandas as pd
import numpy as np

lan_df = pd.read_csv("lan_dataset.csv")
na_df = pd.read_csv("na_dataset.csv")
lan_df.head()
na_df.head()


Unnamed: 0,Blue Mastery 1,Blue Mastery 2,Blue Mastery 3,Blue Mastery 4,Blue Mastery 5,Blue Masteries Avg,Blue Masteries Median,Blue Masteries Kurtorsis,Blue Masteries Skewness,Blue Masteries Std,...,Red Winrate 3,Red Winrate 4,Red Winrate 5,Red Winrates Avg,Red Winrates Median,Red Winrates Kurtorsis,Red Winrates Skewness,Red Winrates Std,Red Winrates Variance,Blue Won
0,4133,1821,8979,24707,5037,8935.4,5037.0,3.439484,1.823128,8217.305193,...,0.434783,0.268293,0.442308,0.441384,0.442308,1.914221,-0.302154,0.105423,0.011114,1
1,117377,6819,203518,93559,99490,104152.6,99490.0,1.694611,0.072554,62699.539649,...,0.468085,0.0,0.176471,0.213843,0.176471,-2.929853,0.22751,0.200966,0.040387,1
2,185844,9320,26882,5563,577853,161092.4,26882.0,3.017505,1.783468,218878.380179,...,0.481928,0.46732,0.2,0.417206,0.46732,4.946368,-2.220568,0.108752,0.011827,1
3,23117,75649,57216,101159,256284,102685.0,75649.0,3.233748,1.692783,80900.461504,...,0.451613,0.4375,0.633333,0.527823,0.55,-1.824953,0.048294,0.07362,0.00542,0
4,3685,1198,37405,981,363699,81393.6,3685.0,4.804045,2.185332,141822.059505,...,0.567901,0.571429,0.333333,0.594533,0.567901,2.775487,1.336285,0.220367,0.048562,0


In [2]:
# Concatenate lan_df and na_df dataframes vertically
frames = [lan_df, na_df]
result = pd.concat(frames)

# Convert the resulting dataframe to a numpy array
dataset = result.to_numpy()

# Shuffle the dataset randomly
np.random.shuffle(dataset)

# Separate the features (X) and the labels (Y)
X = dataset[:, 0:44]
Y = dataset[:, 44]



In [3]:
# Import the GradientBoostingClassifier from scikit-learn
from sklearn.ensemble import GradientBoostingClassifier


# Import necessary modules for evaluation and statistical analysis
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from scipy.stats import sem

# Create an instance of GradientBoostingClassifier with specified parameters
model = GradientBoostingClassifier(n_estimators=55, learning_rate=0.14)

# Create an empty list to store accuracy values
accuracy = []

# Create StratifiedKFold object for cross-validation
skf = StratifiedKFold(n_splits=10, random_state=None)

# Perform k-fold cross-validation
for train_index, test_index in skf.split(X, Y):
    # Split the data into training and testing sets for the current fold
    x_train_fold, x_test_fold = X[train_index], X[test_index]
    y_train_fold, y_test_fold = Y[train_index], Y[test_index]

    # Fit the model on the training data
    model.fit(x_train_fold, y_train_fold)

    # Make predictions on the testing data
    prediction = model.predict(x_test_fold)

    # Calculate the accuracy score for the fold and add it to the list
    score = accuracy_score(prediction, y_test_fold)
    accuracy.append(score * 100)

# Print the output statistics
print("List of possible accuracy: {}".format(accuracy))
print("\nMaximum Accuracy: {:.2f}%".format(max(accuracy)))
print("\nMinimum Accuracy: {:.2f}%".format(min(accuracy)))
print("\nOverall Accuracy: {:.2f}%".format(np.mean(accuracy)))

List of possible accuracy: [88.35978835978835, 87.88947677836568, 90.24103468547912, 90.24103468547912, 89.0652557319224, 89.00646678424457, 89.2416225749559, 90.06466784244563, 89.53556731334508, 90.41740152851264]

Maximum Accuracy: 90.42%

Minimum Accuracy: 87.89%

Overall Accuracy: 89.41%


Now let's divide the datasets from LAN and NA to a final training and testing.

In [4]:
from sklearn.metrics import confusion_matrix, classification_report

lan_dataset = lan_df.to_numpy()
na_dataset = na_df.to_numpy()

np.random.shuffle(lan_dataset)
np.random.shuffle(na_dataset)

x_train = lan_dataset[:, 0:44]
y_train = lan_dataset[:, 44]

x_test = na_dataset[:, 0:44]
y_test = na_dataset[:, 44]


print(f"x_train size: {len(x_train)}")
print(f"y_train size: {len(y_train)}")


print(f"x_test size: {len(x_test)}")
print(f"y_test size: {len(y_test)}")

model_final_test = GradientBoostingClassifier(n_estimators=55, learning_rate=0.14)
model_final_test.fit(x_train, y_train)

print("Accuracy: {:.2f}%".format(model_final_test.score(x_test, y_test) * 100))

predictions = model_final_test.predict(x_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, predictions))

print("Classification Report:")
print(classification_report(y_test, predictions))


x_train size: 12458
y_train size: 12458
x_test size: 4552
y_test size: 4552
Accuracy: 88.53%
Confusion Matrix:
[[1986  247]
 [ 275 2044]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.88      0.89      0.88      2233
         1.0       0.89      0.88      0.89      2319

    accuracy                           0.89      4552
   macro avg       0.89      0.89      0.89      4552
weighted avg       0.89      0.89      0.89      4552



We can notice that it performed really well by predicting 4552 matches from a totally different server. With an accuracy of **88.60%**


Finally we create a final model and save it for live games predictions with streamlit.