In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:

data = pd.read_csv("final_output.csv")

In [3]:
data.head(5)

Unnamed: 0,FTHG,FTAG,HST,AST,B365H,B365D,B365A,FTR
0,6,0,14.0,2.0,1.1,10.0,23.0,H
1,0,2,2.0,6.0,2.8,3.25,2.6,A
2,2,1,8.0,4.0,1.36,5.0,8.5,H
3,1,0,4.0,3.0,3.4,3.3,2.2,H
4,2,0,8.0,3.0,1.8,3.5,4.75,H


In [4]:
# Import the module
from sklearn.preprocessing import LabelEncoder

# Create an instance of the class
le = LabelEncoder()

# Fit the encoder on the column
le.fit(['A', 'H', 'D'])

# Check the classes and their corresponding labels
print(le.classes_)
# Output: array(['A', 'D', 'H'], dtype='<U1')

# Transform the column into numerical labels
data['FTR'] = le.transform(data['FTR'])
print(data['FTR'])
# Output: array([2, 0, 1, 1, 2, ...])



['A' 'D' 'H']
0        2
1        0
2        2
3        2
4        2
        ..
38753    2
38754    0
38755    2
38756    1
38757    2
Name: FTR, Length: 38758, dtype: int32


In [5]:
#Splitting the data into independent and dependent variables
X = data.iloc[:,0:7].values
y = data.iloc[:,7].values
print('The independent features set: ')
print(X[:7,:])
print('The dependent variable: ')
print(y[:7])

The independent features set: 
[[ 6.    0.   14.    2.    1.1  10.   23.  ]
 [ 0.    2.    2.    6.    2.8   3.25  2.6 ]
 [ 2.    1.    8.    4.    1.36  5.    8.5 ]
 [ 1.    0.    4.    3.    3.4   3.3   2.2 ]
 [ 2.    0.    8.    3.    1.8   3.5   4.75]
 [ 1.    1.    1.    5.    2.2   3.2   3.5 ]
 [ 2.    1.    9.    6.    2.3   3.4   3.1 ]]
The dependent variable: 
[2 0 2 2 2 1 2]


In [6]:
X = X.astype('float32')
X.dtype

dtype('float32')

In [7]:
X.shape

(38758, 7)

In [8]:
import torch


y = torch.tensor(y,dtype=torch.long)

In [9]:
y.shape

torch.Size([38758])

In [10]:
# Split the data to train and test dataset.
from sklearn.model_selection import train_test_split
sample_weight = np.random.RandomState(42).rand(y.shape[0])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,shuffle=True,random_state=42)

In [11]:
X_train.shape

(27130, 7)

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torch.nn.utils.rnn import pad_sequence


The Transformer Model

In [13]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers,num_heads):
        super(TransformerModel,self).__init__()

        self.embedding = nn.Linear(input_dim, hidden_dim)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(hidden_dim, num_heads), num_layers)
        
        self.fc = nn.Linear(hidden_dim, output_dim)

    
    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer(x)
        x = x.mean(dim=1)
        x = self.fc(x)

        return x



In [14]:
#set random seeds
torch.manual_seed(42)

#creating a data loader
# set up DataLoader for training set
train_loader = DataLoader(list(zip(X_train, y_train)), shuffle=True, batch_size=16)

# set up DataLoader for testing set
test_loader = DataLoader(list(zip(X_test, y_test)), shuffle=True, batch_size=1)

#define hyperparameters
input_dim = 7
hidden_dim = 32
output_dim = 3
num_layers = 6
num_heads = 8
batch_size = 32
num_epochs = 25
learning_rate = 0.001

#create models and optimizers
model = TransformerModel(input_dim,hidden_dim, output_dim, num_layers, num_heads)

optimizer =  optim.Adam(model.parameters(), lr=learning_rate)

criterion = nn.CrossEntropyLoss()


  from .autonotebook import tqdm as notebook_tqdm


In [15]:
#define hyperparameters
for epoch in range(num_epochs):
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        optimizer.zero_grad()

        #adjusting the dimensionality of input data
        inputs = inputs.unsqueeze(1)

        outputs = model(inputs)
        loss = criterion(outputs, targets)
        total_loss += loss.item() 
        _,predicted = torch.max(outputs, 1)
        total_correct += (predicted == targets).sum().item()
        total_samples += targets.size(0)

        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(train_loader)
    accuracy = 100 * total_correct / total_samples

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")


Epoch 1/25, Loss: 0.6651, Accuracy: 69.86%
Epoch 2/25, Loss: 0.0420, Accuracy: 98.97%
Epoch 3/25, Loss: 0.0226, Accuracy: 99.47%
Epoch 4/25, Loss: 0.3342, Accuracy: 85.71%
Epoch 5/25, Loss: 0.0247, Accuracy: 99.57%
Epoch 6/25, Loss: 0.1036, Accuracy: 96.87%
Epoch 7/25, Loss: 0.3673, Accuracy: 77.97%
Epoch 8/25, Loss: 0.5640, Accuracy: 69.94%
Epoch 9/25, Loss: 0.2234, Accuracy: 92.85%
Epoch 10/25, Loss: 0.0534, Accuracy: 98.82%
Epoch 11/25, Loss: 0.0711, Accuracy: 97.01%
Epoch 12/25, Loss: 0.1084, Accuracy: 95.60%
Epoch 13/25, Loss: 0.0228, Accuracy: 99.45%
Epoch 14/25, Loss: 0.0024, Accuracy: 99.96%
Epoch 15/25, Loss: 0.0193, Accuracy: 99.64%
Epoch 16/25, Loss: 0.0058, Accuracy: 99.90%
Epoch 17/25, Loss: 0.0536, Accuracy: 98.10%
Epoch 18/25, Loss: 0.0022, Accuracy: 99.97%
Epoch 19/25, Loss: 0.0008, Accuracy: 99.99%
Epoch 20/25, Loss: 0.0162, Accuracy: 99.68%
Epoch 21/25, Loss: 0.0080, Accuracy: 99.87%
Epoch 22/25, Loss: 0.0064, Accuracy: 99.90%
Epoch 23/25, Loss: 0.0017, Accuracy: 99.9

In [16]:
#evaluating the model on a test set
correct = 0
total = 0

with torch.no_grad():
    for inputs, targets in test_loader:
        #adjusting the dimensionality of input data
        inputs = inputs.unsqueeze(1)

        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

accuracy = 100* correct/ total
print(f"Test Accuracy: {accuracy:.2f}")

Test Accuracy: 87.95


In [17]:
#["FTHG","FTAG","HTHG","HTAG","Form","B365H","B365D","B365A"]
X_new = np.array([[1.08,1.08,1.00,2.90,3.60,2.30]])
#X_net = torch.FloatTensor(X_new)
#X_new.view(-1,1)
X_new = torch.IntTensor(X_new)
X_new.shape

torch.Size([1, 6])

In [18]:
import torch

# Assuming your model, optimizer, and criterion are already defined
# model = TransformerModel(input_dim, hidden_dim, output_dim, num_layers, num_heads)
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# criterion = nn.CrossEntropyLoss()

# Load the trained model checkpoint
#checkpoint = torch.load('path_to_your_model_checkpoint.pth')
#model.load_state_dict(checkpoint['model_state_dict'])
#optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
#epoch = checkpoint['epoch']
#loss = checkpoint['loss']

# Set the model to evaluation mode
model.eval()

# Create a new input tensor with 7 elements
new_input = torch.tensor([0.83,1.39,4.35,4.91,4.17,2.78,2.18])

# Adjust the dimensionality of the input data
new_input = new_input.unsqueeze(0).unsqueeze(1)  # Adding batch and sequence dimensions

# Make the prediction
with torch.no_grad():
    prediction = model(new_input)

# Convert the prediction to a probability distribution using softmax
predicted_probs = torch.softmax(prediction, dim=1)

# Get the predicted class
predicted_class = torch.argmax(predicted_probs, dim=1).item()

print("Predicted Class:", predicted_class)
#{0:A, 1:D, 2:H}


Predicted Class: 1


In [19]:
import pickle

#Create Pickle file from the Neural network Model
with open('transformer.pickle', 'wb') as dump_var:
   pickle.dump(data, dump_var)

In [20]:

#[1.00,1.26,4.35,4.35,2.62,3.00,3.00]
#[1.30,0.65,4.90,4.90,2.10,3.60,3.40]
#[1.15,4.50,4.35,6.10,3.60,3.30,2.15]
#[1.26,1.04,5.17,5.13,2.20,3.10,3.60]
#[0.96,1.25,4.50,4.75,2.55,3.30,2.80]
#[0.75,1.13,4.46,4.50,3.00,3.10,2.55]
#[1.35,1.00,5.15,5.20,2.20,3.30,3.40]
#[1.15,1.65,5.40,5.10,2.75,2.90,2.70]
#[0.83,0.87,4.35,5.48,2.30,3.00,3.40]
#10[0.80,0.84,4.40,4.92,2.55,3.10,3.10]
#[1.00,1.32,4.72,4.68,2.20.3.00,3.75]
#[1.17,0.87,4.74,5.13,2.45,3.25,3.00]
#[1.00,0.95,3.90,4.15,2.00,3.30,4.00]
#[1.00,1.00,6.30,6.05,3.10,3.40,2.37]
#[2.13,1.35,6.61,5.65,2.15,3.75,3.10]
#[1.04,1.82,4.22,6.36,3.75,3.30,2.05]
#[1.00,1.85,5.60,7.10,2.30,3.40,3.10]