# Neural Network Implementation with Specific Column Set

(used for separate project, just ignore this file)


In [34]:
import pandas as pd
import tensorflow as tf
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import QuantileTransformer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
import os
import matplotlib.pyplot as plt
import joblib

output_dir = "../outputs/nn"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [35]:
# Load the dataset
df = pd.read_csv("../data/processed/games.csv")

# List of columns to drop
columns_to_drop = [
    "magic_dmg",
    "ad_dmg",
    "all_dmg",
    "ad_dmg_taken",
    "total_heal",
    "turret_kill",
    "inhib_kill",
    "objective_dmg",
    "turret_dmg",
    "largest_multi_kill",
    "largest_kill_spree",
    "cc_score",
    "vision",
    "dmg_taken_diff",
]

# Drop the columns
df = df.drop(columns=columns_to_drop)

df.head()

Unnamed: 0,champ,position,op_score,win,length,kill,death,assist,kda,dmg,...,dmg_taken_perc,gold_perc,dmg_per_gold,pinks_bought,ward_kill,ward_place,cs_diff,gold_diff,level_diff,dmg_diff
0,Kai'Sa,ADC,2.706,0,27.117,1,3,3,1.333,15557,...,0.133,0.215,1.591,2,2,9,3,-1796,-1,-2384
1,Kindred,JUNGLE,1.253,0,27.117,2,6,2,0.667,17786,...,0.28,0.197,1.984,0,3,1,-50,-3741,-2,1534
2,Ahri,MID,3.671,0,27.117,2,1,2,4.0,17686,...,0.098,0.207,1.879,5,1,13,-7,-2547,-1,2770
3,Aatrox,TOP,1.777,0,27.117,2,6,2,0.667,17220,...,0.311,0.227,1.666,0,4,6,21,-982,-1,-4790
4,Nautilus,SUPPORT,2.501,0,27.117,3,6,5,1.333,6797,...,0.178,0.155,0.963,6,12,37,-6,-425,-2,2855


In [36]:
# Define the preprocessing for numerical features
numerical_features = [
    "length",
    "kill",
    "death",
    "assist",
    "kda",
    "dmg",
    "dmg_taken",
    "mitigated_dmg",
    "cs",
    "gold",
    "level",
    "kp",
    "dmg_perc",
    "dmg_taken_perc",
    "gold_perc",
    "dmg_per_gold",
    "pinks_bought",
    "ward_kill",
    "ward_place",
    "cs_diff",
    "gold_diff",
    "level_diff",
    "dmg_diff",
]

# Use QuantileTransformer for numerical features
quantile_transformer = Pipeline(
    steps=[("quantile", QuantileTransformer(output_distribution="normal"))]
)

# Define the preprocessing for categorical features with champ
categorical_features_with_champ = ["champ", "position", "win"]

# Use OneHotEncoder for categorical features
categorical_transformer_with_champ = Pipeline(
    steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))]
)

# Combine all preprocessing steps into a single ColumnTransformer for champ
preprocessor_with_champ = ColumnTransformer(
    transformers=[
        ("num", quantile_transformer, numerical_features),
        ("cat", categorical_transformer_with_champ, categorical_features_with_champ),
    ]
)

# Apply the preprocessing with champ
X_with_champ = preprocessor_with_champ.fit_transform(df.drop("op_score", axis=1))
y = df["op_score"].values
preprocessor_with_champ.fit(df.drop("op_score", axis=1))

# Save the preprocessor with champ
joblib.dump(preprocessor_with_champ, "../outputs/nn/vt_preprocessor_with_champ.pkl")

# Define the preprocessing for categorical features without champ
categorical_features_without_champ = ["position", "win"]

# Use OneHotEncoder for categorical features
categorical_transformer_without_champ = Pipeline(
    steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))]
)

# Combine all preprocessing steps into a single ColumnTransformer without champ
preprocessor_without_champ = ColumnTransformer(
    transformers=[
        ("num", quantile_transformer, numerical_features),
        ("cat", categorical_transformer_without_champ, categorical_features_without_champ),
    ]
)

# Apply the preprocessing without champ
X_without_champ = preprocessor_without_champ.fit_transform(df.drop("op_score", axis=1))
preprocessor_without_champ.fit(df.drop("op_score", axis=1))

# Save the preprocessor without champ
joblib.dump(preprocessor_without_champ, "../outputs/nn/vt_preprocessor_without_champ.pkl")

# Check the shapes of X and y
X_with_champ.shape, X_without_champ.shape, y.shape

((204390, 197), (204390, 30), (204390,))

In [37]:
# Target variable
y = df["op_score"].values

# Split data into training and test sets
X_train_with_champ, X_test_with_champ, y_train, y_test = train_test_split(
    X_with_champ, y, test_size=0.2, random_state=42
)
X_train_without_champ, X_test_without_champ, _, _ = train_test_split(
    X_without_champ, y, test_size=0.2, random_state=42
)

In [38]:
# Define a function to create and compile the model
def create_and_compile_model(input_shape):
    model = keras.models.Sequential(
        [
            keras.layers.Dense(128, activation="relu", input_shape=(input_shape,)),
            keras.layers.Dense(64, activation="relu"),
            keras.layers.Dense(1),
        ]
    )
    model.compile(
        optimizer="adam",
        loss="mean_squared_error",
        metrics=["mean_squared_error", "mean_absolute_error"],
    )
    return model

In [39]:
# Train and evaluate model with champ data
model_with_champ = create_and_compile_model(X_train_with_champ.shape[1])
history_with_champ = model_with_champ.fit(
    X_train_with_champ, y_train, validation_split=0.2, epochs=50, batch_size=32, verbose=1
)
test_results_with_champ = model_with_champ.evaluate(X_test_with_champ, y_test, verbose=1)
y_pred_with_champ = model_with_champ.predict(X_test_with_champ)
mse_with_champ = mean_squared_error(y_test, y_pred_with_champ)
r2_with_champ = r2_score(y_test, y_pred_with_champ)
print(f"Model with champ - Mean Squared Error: {mse_with_champ}")
print(f"Model with champ - R^2 Score: {r2_with_champ}")
model_with_champ.save(os.path.join(output_dir, "model_with_champ.h5"))

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4088/4088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 796us/step - loss: 1.2023 - mean_absolute_error: 0.6813 - mean_squared_error: 1.2023 - val_loss: 0.3327 - val_mean_absolute_error: 0.4532 - val_mean_squared_error: 0.3327
Epoch 2/50
[1m4088/4088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 779us/step - loss: 0.3318 - mean_absolute_error: 0.4540 - mean_squared_error: 0.3318 - val_loss: 0.3149 - val_mean_absolute_error: 0.4406 - val_mean_squared_error: 0.3149
Epoch 3/50
[1m4088/4088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 765us/step - loss: 0.3150 - mean_absolute_error: 0.4415 - mean_squared_error: 0.3150 - val_loss: 0.3141 - val_mean_absolute_error: 0.4398 - val_mean_squared_error: 0.3141
Epoch 4/50
[1m4088/4088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 762us/step - loss: 0.3083 - mean_absolute_error: 0.4355 - mean_squared_error: 0.3083 - val_loss: 0.3105 - val_mean_absolute_error: 0.4352 - val_mean_squared_error: 0.3105
Epoch 5/50




Model with champ - Mean Squared Error: 0.3239996163648315
Model with champ - R^2 Score: 0.9333396764050576


In [40]:
# Train and evaluate model without champ data
model_without_champ = create_and_compile_model(X_train_without_champ.shape[1])
history_without_champ = model_without_champ.fit(
    X_train_without_champ, y_train, validation_split=0.2, epochs=50, batch_size=32, verbose=1
)
test_results_without_champ = model_without_champ.evaluate(X_test_without_champ, y_test, verbose=1)
y_pred_without_champ = model_without_champ.predict(X_test_without_champ)
mse_without_champ = mean_squared_error(y_test, y_pred_without_champ)
r2_without_champ = r2_score(y_test, y_pred_without_champ)
print(f"Model without champ - Mean Squared Error: {mse_without_champ}")
print(f"Model without champ - R^2 Score: {r2_without_champ}")
model_without_champ.save(os.path.join(output_dir, "model_without_champ.h5"))

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4088/4088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 691us/step - loss: 1.1605 - mean_absolute_error: 0.6768 - mean_squared_error: 1.1605 - val_loss: 0.3398 - val_mean_absolute_error: 0.4575 - val_mean_squared_error: 0.3398
Epoch 2/50
[1m4088/4088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 660us/step - loss: 0.3328 - mean_absolute_error: 0.4543 - mean_squared_error: 0.3328 - val_loss: 0.3123 - val_mean_absolute_error: 0.4386 - val_mean_squared_error: 0.3123
Epoch 3/50
[1m4088/4088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 662us/step - loss: 0.3193 - mean_absolute_error: 0.4441 - mean_squared_error: 0.3193 - val_loss: 0.3121 - val_mean_absolute_error: 0.4386 - val_mean_squared_error: 0.3121
Epoch 4/50
[1m4088/4088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 672us/step - loss: 0.3133 - mean_absolute_error: 0.4395 - mean_squared_error: 0.3133 - val_loss: 0.3127 - val_mean_absolute_error: 0.4381 - val_mean_squared_error: 0.3127
Epoch 5/50




Model without champ - Mean Squared Error: 0.2969577344685292
Model without champ - R^2 Score: 0.9389033268131923
