In [1]:
import sys, os
sys.path.append(os.path.abspath('..'))
from mads_datasets.base import BaseDatastreamer
from mltrainer.preprocessors import BasePreprocessor
from pathlib import Path
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix
from torch import nn
import torch

from src import datasets, metrics
from src.models import ConvBlocks, CNNSettings

In [2]:
import tomllib

dataset = 'ptb'

datadir = Path('../data')
configfile = Path("config.toml")
with configfile.open('rb') as f:
    config = tomllib.load(f)
print(config)
trainfile = datadir / (config[dataset] + '_train.parq')
testfile = datadir / (config[dataset] + '_test.parq')
trainfile, testfile
shape = (16, 12)
traindataset = datasets.HeartDataset2D(trainfile, target="target", shape=shape)
testdataset = datasets.HeartDataset2D(testfile, target="target", shape=shape)
traindataset, testdataset
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
else:
    device = "cpu"
trainstreamer = BaseDatastreamer(traindataset, preprocessor = BasePreprocessor(), batchsize=32)
teststreamer = BaseDatastreamer(testdataset, preprocessor = BasePreprocessor(), batchsize=32)
len(trainstreamer), len(teststreamer)

{'ptb': 'heart', 'arrhythmia': 'heart_big'}


(363, 90)

In [3]:
model_settings = CNNSettings(
    matrix_shape = (16, 12), #  Shape of the insert matrix
    in_channels = 1,
    hidden_size = 32, 
    num_layers = 3, #  Amount of convolutional layers to add
    num_classes = 5, #  Amount of end classes to be determined 
    attention= True,
    dense_activation='gelu'
    )

model = ConvBlocks(model_settings)

Calculated matrix size: 12
Calculated flatten size: 384


In [4]:
x, y = next(trainstreamer.stream())

# Testing CNN models

In [5]:
precision = metrics.Precision('micro')
recall = metrics.Recall('macro')
accuracy = metrics.Accuracy()

from mltrainer import Trainer, TrainerSettings, ReportTypes
import mlflow
mlflow.set_tracking_uri("sqlite:///mads_exam.db")
mlflow.set_experiment("1D world")

loss_fn = torch.nn.CrossEntropyLoss()

with mlflow.start_run():
    optimizer = torch.optim.Adam

    settings = TrainerSettings(
        epochs=10,
        metrics=[accuracy, recall],
        logdir="logs/heart2D",
        train_steps=len(trainstreamer) // 5,
        valid_steps=len(teststreamer) // 5,
        reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
        scheduler_kwargs=None,
        earlystop_kwargs=None
    )

    # modify the tags when you change them!
    mlflow.set_tag("model", "Conv2D")
    mlflow.set_tag("dataset", "heart_small_binary")
    mlflow.log_param("scheduler", "None")
    mlflow.log_param("earlystop", "None")

    mlflow.log_params(model_settings.__dict__)
    mlflow.log_param("epochs", settings.epochs)
    mlflow.log_param("modeltype", "CNN")
    mlflow.log_param("shape0", shape[0])
    mlflow.log_param("optimizer", str(optimizer))
    mlflow.log_params(settings.optimizer_kwargs)

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optimizer,
        traindataloader=trainstreamer.stream(),
        validdataloader=teststreamer.stream(),
        scheduler=None,
        )
    trainer.loop()


# No SelfAttention
"""
With no self attention
----------------------

2025-06-12 12:40:29.095 | INFO     | mltrainer.trainer:dir_add_timestamp:23 - Logging to logs\heart2D\20250612-124029
2025-06-12 12:40:40.574 | INFO     | mltrainer.trainer:report:198 - Epoch 0 train 0.6231 test 0.4953 metric ['0.8660', '0.8660', '0.4331']
2025-06-12 12:40:49.830 | INFO     | mltrainer.trainer:report:198 - Epoch 1 train 0.3396 test 0.2720 metric ['0.9345', '0.9345', '0.6792']
2025-06-12 12:40:58.755 | INFO     | mltrainer.trainer:report:198 - Epoch 2 train 0.2255 test 0.1732 metric ['0.9524', '0.9524', '0.7947']
2025-06-12 12:41:07.598 | INFO     | mltrainer.trainer:report:198 - Epoch 3 train 0.1747 test 0.1747 metric ['0.9540', '0.9540', '0.8135']
2025-06-12 12:41:16.644 | INFO     | mltrainer.trainer:report:198 - Epoch 4 train 0.1551 test 0.1417 metric ['0.9619', '0.9619', '0.8665']
2025-06-12 12:41:25.437 | INFO     | mltrainer.trainer:report:198 - Epoch 5 train 0.1408 test 0.1358 metric ['0.9665', '0.9665', '0.8527']
2025-06-12 12:41:34.270 | INFO     | mltrainer.trainer:report:198 - Epoch 6 train 0.1359 test 0.1339 metric ['0.9632', '0.9632', '0.8316']
2025-06-12 12:41:43.206 | INFO     | mltrainer.trainer:report:198 - Epoch 7 train 0.1280 test 0.1252 metric ['0.9701', '0.9701', '0.8852']
2025-06-12 12:41:51.908 | INFO     | mltrainer.trainer:report:198 - Epoch 8 train 0.1146 test 0.1115 metric ['0.9715', '0.9715', '0.8671']
2025-06-12 12:42:00.693 | INFO     | mltrainer.trainer:report:198 - Epoch 9 train 0.1091 test 0.1228 metric ['0.9674', '0.9674', '0.8505']

With Selfattention
------------------
2025-06-11 17:28:34.844 | INFO     | mltrainer.trainer:dir_add_timestamp:23 - Logging to logs\heart2D\20250611-172834
2025-06-11 17:28:47.577 | INFO     | mltrainer.trainer:report:198 - Epoch 0 train 0.5928 test 0.3834 metric ['0.8879', '0.8879', '0.5407']
2025-06-11 17:28:58.848 | INFO     | mltrainer.trainer:report:198 - Epoch 1 train 0.3588 test 0.3643 metric ['0.8982', '0.8982', '0.6011']
2025-06-11 17:29:09.007 | INFO     | mltrainer.trainer:report:198 - Epoch 2 train 0.3078 test 0.2556 metric ['0.9409', '0.9409', '0.7152']
2025-06-11 17:29:19.159 | INFO     | mltrainer.trainer:report:198 - Epoch 3 train 0.2567 test 0.2526 metric ['0.9380', '0.9380', '0.7057']
2025-06-11 17:29:29.365 | INFO     | mltrainer.trainer:report:198 - Epoch 4 train 0.2216 test 0.2405 metric ['0.9341', '0.9341', '0.7038']
2025-06-11 17:29:40.296 | INFO     | mltrainer.trainer:report:198 - Epoch 5 train 0.2095 test 0.2105 metric ['0.9455', '0.9455', '0.7568']
2025-06-11 17:29:51.609 | INFO     | mltrainer.trainer:report:198 - Epoch 6 train 0.2077 test 0.2526 metric ['0.9352', '0.9352', '0.7884']
2025-06-11 17:30:04.300 | INFO     | mltrainer.trainer:report:198 - Epoch 7 train 0.1846 test 0.1844 metric ['0.9538', '0.9538', '0.8075']
2025-06-11 17:30:17.407 | INFO     | mltrainer.trainer:report:198 - Epoch 8 train 0.1897 test 0.1698 metric ['0.9614', '0.9614', '0.8498']
2025-06-11 17:30:29.265 | INFO     | mltrainer.trainer:report:198 - Epoch 9 train 0.1696 test 0.1825 metric ['0.9517', '0.9517', '0.7818']

With MultiHead Self attention
------------------
2025-06-11 17:37:27.088 | INFO     | mltrainer.trainer:dir_add_timestamp:23 - Logging to logs\heart2D\20250611-173727
2025-06-11 17:37:42.383 | INFO     | mltrainer.trainer:report:198 - Epoch 0 train 0.5536 test 0.3658 metric ['0.9076', '0.9076', '0.6114']
2025-06-11 17:37:55.318 | INFO     | mltrainer.trainer:report:198 - Epoch 1 train 0.3139 test 0.2612 metric ['0.9352', '0.9352', '0.7102']
2025-06-11 17:38:07.132 | INFO     | mltrainer.trainer:report:198 - Epoch 2 train 0.2405 test 0.2361 metric ['0.9405', '0.9405', '0.7631']
2025-06-11 17:38:18.777 | INFO     | mltrainer.trainer:report:198 - Epoch 3 train 0.1933 test 0.1941 metric ['0.9559', '0.9559', '0.8365']
2025-06-11 17:38:32.020 | INFO     | mltrainer.trainer:report:198 - Epoch 4 train 0.1696 test 0.1644 metric ['0.9561', '0.9561', '0.7823']
2025-06-11 17:38:43.319 | INFO     | mltrainer.trainer:report:198 - Epoch 5 train 0.1572 test 0.1405 metric ['0.9671', '0.9671', '0.8532']
2025-06-11 17:38:54.447 | INFO     | mltrainer.trainer:report:198 - Epoch 6 train 0.1335 test 0.1340 metric ['0.9674', '0.9674', '0.8493']
2025-06-11 17:39:10.569 | INFO     | mltrainer.trainer:report:198 - Epoch 7 train 0.1356 test 0.1371 metric ['0.9644', '0.9644', '0.8481']
2025-06-11 17:39:23.101 | INFO     | mltrainer.trainer:report:198 - Epoch 8 train 0.1268 test 0.1497 metric ['0.9614', '0.9614', '0.8267']
2025-06-11 17:39:36.016 | INFO     | mltrainer.trainer:report:198 - Epoch 9 train 0.1273 test 0.1224 metric ['0.9678', '0.9678', '0.8565']

Epoch 9 train 0.3166 test 0.3350 metric ['0.8524', '0.8524', '0.8636']
"""

  2025-06-12 12:40:29.095 | INFO     | mltrainer.trainer:dir_add_timestamp:23 - Logging to logs\heart2D\20250612-124029
[32m2025-06-27 18:54:47.643[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m23[0m - [1mLogging to logs\heart2D\20250627-185447[0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:01<00:00, 56.07it/s]
[32m2025-06-27 18:54:50.609[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m198[0m - [1mEpoch 0 train 0.7325 test 0.5952 metric ['0.7153', '0.5000'][0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:01<00:00, 49.66it/s]
[32m2025-06-27 18:54:52.315[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m198[0m - [1mEpoch 1 train 0.5790 test 0.5665 metric ['0.7101', '0.5000'][0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:01<00:00, 49.02it/s]
[32m2025-06-27 18:54:53.980[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m198[0m - [1mEpoch 2 train 0.5109 test 0.485

"\nWith no self attention\n----------------------\n\n2025-06-12 12:40:29.095 | INFO     | mltrainer.trainer:dir_add_timestamp:23 - Logging to logs\\heart2D\x8250612-124029\n2025-06-12 12:40:40.574 | INFO     | mltrainer.trainer:report:198 - Epoch 0 train 0.6231 test 0.4953 metric ['0.8660', '0.8660', '0.4331']\n2025-06-12 12:40:49.830 | INFO     | mltrainer.trainer:report:198 - Epoch 1 train 0.3396 test 0.2720 metric ['0.9345', '0.9345', '0.6792']\n2025-06-12 12:40:58.755 | INFO     | mltrainer.trainer:report:198 - Epoch 2 train 0.2255 test 0.1732 metric ['0.9524', '0.9524', '0.7947']\n2025-06-12 12:41:07.598 | INFO     | mltrainer.trainer:report:198 - Epoch 3 train 0.1747 test 0.1747 metric ['0.9540', '0.9540', '0.8135']\n2025-06-12 12:41:16.644 | INFO     | mltrainer.trainer:report:198 - Epoch 4 train 0.1551 test 0.1417 metric ['0.9619', '0.9619', '0.8665']\n2025-06-12 12:41:25.437 | INFO     | mltrainer.trainer:report:198 - Epoch 5 train 0.1408 test 0.1358 metric ['0.9665', '0.9665'

In [6]:
from src.models import MultiHeadSelfAtt
for layer in model.convolutions:
    if isinstance(layer, MultiHeadSelfAtt):
        print(layer.weights)

Parameter containing:
tensor([-0.0009], requires_grad=True)
Parameter containing:
tensor([-0.0233], requires_grad=True)


# Testing RNN Models

In [7]:
import tomllib

dataset = 'ptb'

datadir = Path('../data')
configfile = Path("config.toml")

with configfile.open('rb') as f:
    config = tomllib.load(f)
print(config)

trainfile = datadir / (config[dataset] + '_train.parq')
testfile = datadir / (config[dataset] + '_test.parq')
trainfile, testfile

traindataset = datasets.HeartDataset1D(trainfile, target="target")
testdataset = datasets.HeartDataset1D(testfile, target="target")
traindataset, testdataset

if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
else:
    device = "cpu"
    
trainstreamer = BaseDatastreamer(traindataset, preprocessor = BasePreprocessor(), batchsize=32)
teststreamer = BaseDatastreamer(testdataset, preprocessor = BasePreprocessor(), batchsize=32)
len(trainstreamer), len(teststreamer)

{'ptb': 'heart', 'arrhythmia': 'heart_big'}


(363, 90)

## GRU Attention

In [8]:
from src.models import GRUSettings, GRUmodel

model_settings = GRUSettings(
    hidden_size = 32,
    input_size = 1,
    num_layers = 2,
    output_size = 5,
    dropout = 0.2,
    attention_dropout=0.3
)

model = GRUmodel(model_settings)

In [9]:
from mltrainer import Trainer, TrainerSettings, ReportTypes
import mlflow
mlflow.set_tracking_uri("sqlite:///mads_exam.db")
mlflow.set_experiment("1D Escapades")


precision = metrics.Precision('micro')
recall = metrics.Recall('macro')
accuracy = metrics.Accuracy()

loss_fn = torch.nn.CrossEntropyLoss()


with mlflow.start_run():
    optimizer = torch.optim.Adam
    settings = TrainerSettings(
        epochs=10,
        metrics=[accuracy, recall],
        logdir="logs/heart1D",
        train_steps=len(trainstreamer) // 5,
        valid_steps=len(teststreamer) // 5,
        reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
        scheduler_kwargs=None,
        earlystop_kwargs=None
    )

    # modify the tags when you change them!
    mlflow.set_tag("model", "RNN")
    mlflow.set_tag("dataset", "heart_small_binary")
    mlflow.log_param("scheduler", "None")
    mlflow.log_param("earlystop", "None")

    mlflow.log_params(model_settings.__dict__)
    mlflow.log_param("epochs", settings.epochs)
    mlflow.log_param("shape0", shape[0])
    mlflow.log_param("modeltype", "RNN")
    mlflow.log_param("optimizer", str(optimizer))
    mlflow.log_params(settings.optimizer_kwargs)

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optimizer,
        traindataloader=trainstreamer.stream(),
        validdataloader=teststreamer.stream(),
        scheduler=None,
        )
    trainer.loop()


# No SelfAttention
"""

"""

[32m2025-06-27 18:55:04.757[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m23[0m - [1mLogging to logs\heart1D\20250627-185504[0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:13<00:00,  5.40it/s]
[32m2025-06-27 18:55:18.709[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m198[0m - [1mEpoch 0 train 0.8570 test 0.5848 metric ['0.7483', '0.5000'][0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:13<00:00,  5.33it/s]
[32m2025-06-27 18:55:32.732[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m198[0m - [1mEpoch 1 train 0.6089 test 0.6123 metric ['0.6997', '0.5000'][0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:13<00:00,  5.49it/s]
[32m2025-06-27 18:55:46.400[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m198[0m - [1mEpoch 2 train 0.5833 test 0.6063 metric ['0.7066', '0.5000'][0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:13<00:00,  5.27it/s]
[32m2025-06-27 18:

'\n\n'

## LSTM Model

In [10]:
from src.models import LSTMSettings, LSTMmodel

model_settings = LSTMSettings(
    hidden_size = 32,
    input_size = 1,
    num_layers = 2,
    output_size = 5,
    dropout = 0.2,
    attention_dropout=0.3
)

model = LSTMmodel(model_settings)

In [11]:
from mltrainer import Trainer, TrainerSettings, ReportTypes
import mlflow
mlflow.set_tracking_uri("sqlite:///mads_exam.db")
mlflow.set_experiment("1D Escapades")


precision = metrics.Precision('micro')
recall = metrics.Recall('macro')
accuracy = metrics.Accuracy()

loss_fn = torch.nn.CrossEntropyLoss()


with mlflow.start_run():
    optimizer = torch.optim.Adam
    settings = TrainerSettings(
        epochs=10,
        metrics=[accuracy, recall],
        logdir="logs/heart1D",
        train_steps=len(trainstreamer) // 5,
        valid_steps=len(teststreamer) // 5,
        reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
        scheduler_kwargs=None,
        earlystop_kwargs=None
    )

    # modify the tags when you change them!
    mlflow.set_tag("model", "RNN")
    mlflow.set_tag("dataset", "heart_small_binary")
    mlflow.log_param("scheduler", "None")
    mlflow.log_param("earlystop", "None")

    mlflow.log_params(model_settings.__dict__)
    mlflow.log_param("epochs", settings.epochs)
    mlflow.log_param("shape0", shape[0])
    mlflow.log_param("modeltype", "RNN")
    mlflow.log_param("optimizer", str(optimizer))
    mlflow.log_params(settings.optimizer_kwargs)

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optimizer,
        traindataloader=trainstreamer.stream(),
        validdataloader=teststreamer.stream(),
        scheduler=None,
        )
    trainer.loop()


# No SelfAttention
"""

"""

[32m2025-06-27 18:57:24.581[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m23[0m - [1mLogging to logs\heart1D\20250627-185724[0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:07<00:00,  9.91it/s]
[32m2025-06-27 18:57:32.197[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m198[0m - [1mEpoch 0 train 0.9258 test 0.5928 metric ['0.7240', '0.5000'][0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:07<00:00, 10.28it/s]
[32m2025-06-27 18:57:39.522[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m198[0m - [1mEpoch 1 train 0.5925 test 0.6021 metric ['0.7205', '0.5000'][0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:07<00:00,  9.57it/s]
[32m2025-06-27 18:57:47.356[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m198[0m - [1mEpoch 2 train 0.6042 test 0.5703 metric ['0.7431', '0.5000'][0m
100%|[38;2;30;71;6m██████████[0m| 72/72 [00:07<00:00,  9.61it/s]
[32m2025-06-27 18:

'\n\n'