In [2]:
pip install --upgrade --no-cache-dir sports_prediction_framework

Collecting sports_prediction_framework
  Downloading sports_prediction_framework-0.1.2-py3-none-any.whl.metadata (607 bytes)
Collecting mlflow==2.20.2 (from sports_prediction_framework)
  Downloading mlflow-2.20.2-py3-none-any.whl.metadata (30 kB)
Collecting mlflow_skinny==2.20.2 (from sports_prediction_framework)
  Downloading mlflow_skinny-2.20.2-py3-none-any.whl.metadata (31 kB)
Collecting numpy<2.0,>=1.18.5 (from sports_prediction_framework)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m142.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting optuna==4.3.0 (from sports_prediction_framework)
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting paramiko==3.5.1 (from sports_prediction_framework)
  Downloading paramiko-3.5.1-py3-none-any.whl.metadata (4.6 kB)
Collecting psycopg2_binary==2.9.10 (from sports_prediction_framework)
  D

In [1]:
from sports_prediction_framework.datawrapper.SportType import SportType
from sports_prediction_framework.dataloader.DataLoader import DataLoader
from sports_prediction_framework.datawrapper.DataHandler import DataHandler
import pandas as pd


from google.colab import files
uploaded = files.upload()

df = pd.read_parquet('data.parquet')


handler = DataHandler(df)
dw = SportType.FOOTBALL.get_wrapper()(handler)
print(dw)

Saving data.parquet to data.parquet
            Date           Home            Away  HS  AS  WDL  odds_1  odds_X  \
0     2004-01-21  Bayern Munich    Hamburger SV   3   0    1    1.39    4.00   
1     2004-01-22      Wolfsburg        Dortmund   1   2    2    1.83    3.25   
2     2004-01-22       Nurnberg  Kaiserslautern   1   3    2    2.10    3.25   
3     2004-01-22       Nurnberg  Kaiserslautern   1   3    2    2.00     NaN   
4     2004-01-22          Mainz       Stuttgart   2   3    2    2.79    3.25   
...          ...            ...             ...  ..  ..  ...     ...     ...   
10301 2022-02-13   Union Berlin        Dortmund   0   3    2    3.80    3.60   
10302 2022-02-13   Union Berlin        Dortmund   0   3    2    3.50    3.50   
10303        NaT  Hertha Berlin        Duisburg   2   0    1    1.62    3.60   
10304        NaT  Hertha Berlin        Duisburg   2   0    1    1.05   10.00   
10305        NaT  Hertha Berlin        Duisburg   2   0    1     NaN     NaN   

   

In [2]:
from sports_prediction_framework.transformer.Transformer import *
#Applies basic transformations to the dataframe.
t = Transformer()
dw = t.transform(dw)
print(dw)

            Date           Home            Away  HID  AID  HS  AS  WDL  \
0     2004-01-21  Bayern Munich    Hamburger SV    5   17   3   0    1   
1     2004-01-22      Wolfsburg        Dortmund   35    9   1   2    2   
2     2004-01-22       Nurnberg  Kaiserslautern   27   24   1   3    2   
3     2004-01-22       Nurnberg  Kaiserslautern   27   24   1   3    2   
4     2004-01-22          Mainz       Stuttgart   26   32   2   3    2   
...          ...            ...             ...  ...  ...  ..  ..  ...   
10301 2022-02-13   Union Berlin        Dortmund   33    9   0   3    2   
10302 2022-02-13   Union Berlin        Dortmund   33    9   0   3    2   
10303        NaT  Hertha Berlin        Duisburg   20   10   2   0    1   
10304        NaT  Hertha Berlin        Duisburg   20   10   2   0    1   
10305        NaT  Hertha Berlin        Duisburg   20   10   2   0    1   

       odds_1  odds_X  odds_2  
0        1.39    4.00    6.50  
1        1.83    3.25    3.75  
2        2.10  

In [3]:
from sports_prediction_framework.transformer.Scope import *
from sports_prediction_framework.transformer.DataSelector import *

relevant_scope = [WindowSelector(ScopeExpander(dw, {
    'col': 'Season', 'start': 2004, 'max': 2008, 'size': 1, 'stride': 2
}))]
prediction_scope = [WindowSelector(ScopeExpander(dw, {
    'col': 'Season', 'start': 2004, 'max': 2008, 'size': 1, 'stride': 2
}))]
scope = DataSelector(relevant_scope, prediction_scope)

In [4]:
from sports_prediction_framework.model.FlatModel import FlatModel
from sports_prediction_framework.learner.Learner import Learner, UpdatingLearner, Tester, Trainer

params = {
    'embed_dim': 32,
    'out_dim': 3,
    'n_dense': 4,
    'dense_dim': 64,
    'architecture_type': 'rectangle',
    'batch_size': 64,
}
flat = FlatModel(params)

#Setup learner and updating learner
l1 = Learner(Trainer(flat), Tester(flat), scope)
l = UpdatingLearner(Trainer(flat), Tester(flat), scope, [l1])

In [5]:
from sports_prediction_framework.optimizer.Optimizer import Optimizer
from sports_prediction_framework.utils.Evaluation import Metric

search_space = {
    'n_dense': ('int', 2, 5),
}

# 7. Create optimizer and run search
opt = Optimizer(dw, l, Metric.ACCURACY, search_space, n_trials=4)
opt.run()

[I 2025-06-01 14:29:52,872] A new study created in memory with name: no-name-1f1efd0a-fd33-4d82-af08-5cc477aad159
[I 2025-06-01 14:30:07,590] Trial 0 finished with value: 0.7726285300506879 and parameters: {'n_dense': 5}. Best is trial 0 with value: 0.7726285300506879.


   Accuracy  Precision    Recall  F1 Score  Brier Score       RPS
0  0.772629   0.757039  0.758702  0.757614     0.315257  0.009161
Trial 0: {'n_dense': 5}, Accuracy = 0.7726


[I 2025-06-01 14:30:15,673] Trial 1 finished with value: 0.7682838522809559 and parameters: {'n_dense': 4}. Best is trial 0 with value: 0.7726285300506879.


   Accuracy  Precision    Recall  F1 Score  Brier Score       RPS
0  0.768284   0.750562  0.755167  0.752614      0.30476  0.008795
Trial 1: {'n_dense': 4}, Accuracy = 0.7683


[I 2025-06-01 14:30:24,270] Trial 2 finished with value: 0.776249094858798 and parameters: {'n_dense': 4}. Best is trial 2 with value: 0.776249094858798.


   Accuracy  Precision    Recall  F1 Score  Brier Score       RPS
0  0.776249    0.76075  0.761485  0.760343     0.311848  0.008989
Trial 2: {'n_dense': 4}, Accuracy = 0.7762


[I 2025-06-01 14:30:33,007] Trial 3 finished with value: 0.7429398986241854 and parameters: {'n_dense': 3}. Best is trial 2 with value: 0.776249094858798.


   Accuracy  Precision    Recall  F1 Score  Brier Score       RPS
0   0.74294    0.72302  0.718382  0.719566     0.380356  0.011811
Trial 3: {'n_dense': 3}, Accuracy = 0.7429
