In [49]:
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, precision_score
import torch

In [50]:
torch.set_printoptions(precision=6, sci_mode=False)

In [51]:
df = pd.read_csv("data_win_prediction.csv",sep=";")
df = df.drop(columns=["map","\tMatch ID"])

In [52]:
df.Team_A_avg_win_percentage = df.Team_A_avg_win_percentage.str.replace(",",".").astype(float)
df.Team_B_avg_win_percentage = df.Team_B_avg_win_percentage.str.replace(",",".").astype(float)
df.Team_A_avg_KR = df.Team_A_avg_KR.str.replace(",",".").astype(float)
df.Team_A_avg_elo = df.Team_A_avg_elo.str.replace(",",".").astype(float)
df.Team_B_avg_KR = df.Team_B_avg_KR.str.replace(",",".").astype(float)

In [53]:
df.win = df.win.map({"team a":0, "team b":1})

In [54]:
X, y = df.drop(columns=["win"]), df.win

In [55]:
columns = X.columns.to_list()

In [56]:
#X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(X_scaled, columns=columns), y, test_size = 0.25)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [57]:
# creating tensors
X_train_tensor = torch.tensor(X_train.to_numpy(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.to_numpy(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32)

# intializing data into loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
val_loader = DataLoader(test_dataset, batch_size = 32, shuffle = False)

In [58]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [59]:
class CS_NETWORK(nn.Module):
    def __init__(self, input_size, layer_1_size, layer_2_size, output_size):
        super().__init__()
        self.features = nn.Sequential(
            nn.Linear(input_size, layer_1_size),
            nn.BatchNorm1d(layer_1_size),
            nn.ReLU(),
            nn.Linear(layer_1_size, layer_2_size),
            nn.BatchNorm1d(layer_2_size),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(layer_2_size, output_size)
        )

    def forward(self, x):
        x = self.features(x)
        return x

In [60]:
model = CS_NETWORK(len(columns), 64, 64, 1)

In [61]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.01)

In [62]:
epochs = 100
threshold = 0.5

In [67]:
def train_model(model, criterion, optimizer, train_loader, epochs, threshold = 0.5) -> None:
    model.train()
    train_loss_list = []
    train_acc_list = []
    
    for epoch in range(epochs):
        running_loss = 0.0
        total = 0
        correct = 0
    
        for x_train, y_train in train_loader:
            # Forward pass
            optimizer.zero_grad()
            outputs = model(x_train).squeeze()
            loss = criterion(outputs, y_train.float())
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Accumulate loss (multiply by batch size since loss is usually mean)
            running_loss += loss.item() * x_train.size(0)
            
            # Calculate accuracy
            predict = (outputs > 0.5).float()
            total += y_train.size(0)
            correct += (predict == y_train).sum().item()
        
        # Calculate average loss and accuracy for the epoch
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct / total
        
        train_loss_list.append(epoch_loss)
        train_acc_list.append(epoch_acc)
    
        if (epoch + 1) % 5 == 0:
            print(
                f"Epoch [{epoch+1}/{epochs}], ",
                f"Loss: {epoch_loss:.4f}, ",
                f"Acc: {epoch_acc:.3f}"
            )

In [68]:
train_model(model, criterion, optimizer, train_loader, epochs, threshold)

Epoch [5/100],  Loss: 0.4370,  Acc: 0.775
Epoch [10/100],  Loss: 0.4306,  Acc: 0.784
Epoch [15/100],  Loss: 0.4419,  Acc: 0.784
Epoch [20/100],  Loss: 0.4305,  Acc: 0.783
Epoch [25/100],  Loss: 0.4360,  Acc: 0.782
Epoch [30/100],  Loss: 0.4385,  Acc: 0.787
Epoch [35/100],  Loss: 0.4410,  Acc: 0.775
Epoch [40/100],  Loss: 0.4351,  Acc: 0.789
Epoch [45/100],  Loss: 0.4383,  Acc: 0.788
Epoch [50/100],  Loss: 0.4381,  Acc: 0.788
Epoch [55/100],  Loss: 0.4432,  Acc: 0.790
Epoch [60/100],  Loss: 0.4399,  Acc: 0.778
Epoch [65/100],  Loss: 0.4331,  Acc: 0.785
Epoch [70/100],  Loss: 0.4356,  Acc: 0.779
Epoch [75/100],  Loss: 0.4353,  Acc: 0.784
Epoch [80/100],  Loss: 0.4409,  Acc: 0.781
Epoch [85/100],  Loss: 0.4287,  Acc: 0.785
Epoch [90/100],  Loss: 0.4324,  Acc: 0.794
Epoch [95/100],  Loss: 0.4295,  Acc: 0.794
Epoch [100/100],  Loss: 0.4351,  Acc: 0.791


In [70]:
def testing_model(model, val_loader, y_test, threshold = 0.5) -> None:
    model.eval()
    total = 0
    correct = 0
    val_accuracy_list = []
    with torch.no_grad():
        for x_val, y_val in val_loader:
            x_val = x_val.to(device)
            y_val = y_val.to(device)
            outputs = model(x_val).squeeze()
            predict = (outputs > threshold).float()

            predict_np = predict.cpu().detach().numpy()
            y_test_np = y_test_tensor.cpu().numpy()
       
            total += y_val.size(0)
            correct += (predict == y_val).sum().item()

            val_accuracy = (correct/total) * 100
            val_accuracy_list.append(val_accuracy)

            
    print(sum(val_accuracy_list)/len(val_accuracy_list))
    print(recall_score(predict, y_test_tensor))

In [71]:
testing_model(model, val_loader, threshold)

74.53588854804369


ValueError: Found input variables with inconsistent numbers of samples: [15, 367]