In [None]:
import pandas as pd
import torch as th
import torch.nn as nn
import glob
import pickle

import sys
sys.path.append('../models/')
from models.lstm import LSTM, train_model, RPS_loss

In [None]:

training_home_teams_matches = th.load("dataset/tensors/training_home_teams_matches.pt")
training_away_teams_matches = th.load("dataset/tensors/training_away_teams_matches.pt")
training_matches_features_home = th.load("dataset/tensors/training_matches_features_home.pt")
training_matches_features_away = th.load("dataset/tensors/training_matches_features_away.pt")
training_targets = th.load("dataset/tensors/training_targets.pt")

test_home_teams_matches = th.load("dataset/tensors/test_home_teams_matches.pt")
test_away_teams_matches = th.load("dataset/tensors/test_away_teams_matches.pt")
test_matches_features_home = th.load("dataset/tensors/test_matches_features_home.pt")
test_matches_features_away = th.load("dataset/tensors/test_matches_features_away.pt")
test_targets = th.load("dataset/tensors/test_targets.pt")



In [None]:
import pickle
file1 = 'dataset/tensors/idx_to_teams.pkl'
file2 = 'dataset/tensors/teams_to_idx.pkl'
file3 = 'dataset/tensors/result_map.pkl'

with open(file1, 'rb') as file:
    idx_to_teams = pickle.load(file)

with open(file2, 'rb') as file:
    teams_to_idx = pickle.load(file)

with open(file3, 'rb') as file:
    result_map = pickle.load(file)

print(result_map)
idx_to_result = {0: 'H', 1: 'D', 2: 'A'}



{'H': 0, 'D': 1, 'A': 2}


In [None]:
print("Training data")
print(training_home_teams_matches.shape) # the idex of home team for the matches
print(training_away_teams_matches.shape) # the idex of away team for the matches
print(training_matches_features_home.shape) # the features of the home team for the matches
print(training_matches_features_away.shape) # the features of the away team for the matches
print(training_targets.shape) # the targets of the matches H D A

print("Test data")
print(test_home_teams_matches.shape)
print(test_away_teams_matches.shape)
print(test_matches_features_home.shape)
print(test_matches_features_away.shape)
print(test_targets.shape)




Training data
torch.Size([26361])
torch.Size([26361])
torch.Size([26361, 5, 14])
torch.Size([26361, 5, 14])
torch.Size([26361])
Test data
torch.Size([4054])
torch.Size([4054])
torch.Size([4054, 5, 14])
torch.Size([4054, 5, 14])
torch.Size([4054])


In [None]:
import torch.nn.functional as F

def one_hot_targets(targets):
    num_classes = th.max(targets).item() + 1
    return F.one_hot(targets, num_classes=num_classes)

training_targets = one_hot_targets(training_targets)
test_targets = one_hot_targets(test_targets)

print(training_targets.shape)
print(test_targets.shape)

for i in range(3):
    print(training_targets[i])
    print(test_targets[i])

torch.Size([26361, 3])
torch.Size([4054, 3])
tensor([0, 0, 1])
tensor([1, 0, 0])
tensor([0, 0, 1])
tensor([0, 0, 1])
tensor([0, 1, 0])
tensor([1, 0, 0])


In [None]:
index = 0
print("First match")
print("Home team: ", idx_to_teams[training_home_teams_matches[index].item()])
print("Away team: ", idx_to_teams[training_away_teams_matches[index].item()])
print("Features home: ", training_matches_features_home[index])
print("Features away: ", training_matches_features_away[index])
# print("Target: ", idx_to_result[training_targets[index].item()])


First match
Home team:  Troyes
Away team:  Sochaux
Features home:  tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 2.7000, 3.0000, 2.8000]])
Features away:  tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000],

In [None]:
#split the data into training and validation
from torch.utils.data import DataLoader, TensorDataset

split = 0.8
split_idx = int(len(training_home_teams_matches) * split)

train_dataset = TensorDataset(training_home_teams_matches[:split_idx], training_away_teams_matches[:split_idx], training_matches_features_home[:split_idx], training_matches_features_away[:split_idx], training_targets[:split_idx])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False) # true could cause lookahead

val_dataset = TensorDataset(training_home_teams_matches[split_idx:], training_away_teams_matches[split_idx:], training_matches_features_home[split_idx:], training_matches_features_away[split_idx:], training_targets[split_idx:])
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

test_dataset = TensorDataset(test_home_teams_matches, test_away_teams_matches, test_matches_features_home, test_matches_features_away, test_targets)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print("Training data")
print(len(train_dataset))
print(len(train_loader))

print("Validation data")
print(len(val_dataset))
print(len(val_loader))

print("Test data")
print(len(test_dataset))
print(len(test_loader))

Training data
21088
659
Validation data
5273
165
Test data
4054
127


In [None]:
for home_teams, away_teams, home_features, away_features, targets in train_loader:
    print(home_teams.shape)
    print(away_teams.shape)
    print(home_features.shape)
    print(away_features.shape)
    print(targets.shape)
    break

torch.Size([32])
torch.Size([32])
torch.Size([32, 5, 14])
torch.Size([32, 5, 14])
torch.Size([32, 3])


In [11]:
model = LSTM(
            num_features=14, 
            out_channels=7, 
            hidden_size=32, 
            num_heads=2, num_classes=3, bottleneck_dim = None)

train_model(model, 
criterion=RPS_loss,
# criterion= nn.CrossEntropyLoss(),
train_loader=train_loader, 
val_loader=val_loader,
learning_rate=0.0005,
num_epochs=10)

50 Loss: 0.40241381525993347 Train Acc: 13.971168437025797 Val Acc: 13.757575757575758
