In [1]:
from read_process_embedder.datasets import MomentsDataset, DataModuleFromDataset
import torch
from read_process_embedder.models import ReadProcessEmbedder
import lightning as pl
from lightning.pytorch.loggers import WandbLogger

In [2]:
dataset = MomentsDataset(
    means=torch.linspace(0, 10, 5).tolist(),
    stds=torch.linspace(0.1, 1, 5).tolist(),
    seq_length=100,
    num_samples=1000,
)
datamodule = DataModuleFromDataset(dataset, batch_size=32)

In [3]:
i = 15
dataset.X[i].mean(), dataset.X[i].std(), dataset.y[i]

(tensor(2.5041), tensor(0.0939), tensor([2.5000, 0.1000]))

In [4]:
set_encoder = ReadProcessEmbedder(
    feature_size=1,
    output_size=2,
    reading_block_cells=[32, 32],
    writing_block_cells=[32, 32],
    memory_size=16,
    processing_steps=5,
)

In [5]:
X = dataset.X[:32]
y = dataset.y[:32]
X.shape, y.shape

(torch.Size([32, 100, 1]), torch.Size([32, 2]))

In [6]:
set_encoder(X).shape

torch.Size([32, 2])

In [7]:
logger = WandbLogger(project="read-process-embedder", save_dir="logs")
trainer = pl.Trainer(
    logger=logger,
    max_epochs=50,
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    default_root_dir="logs",
)
trainer.fit(set_encoder, datamodule=datamodule)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 2050') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mvalterschutz[0m ([33mvalterschutz-chalmers-university-of-technology[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type    | Params | Mode 
--------------------------------------------------
0 | reading_block | MLP     | 1.6 K  | train
1 | rnn           | GRU     | 3.4 K  | train
2 | proj          | Linear  | 528    | train
3 | write_block   | MLP     | 2.2 K  | train
4 | criterion     | MSELoss | 0      | train
--------------------------------------------------
7.7 K     Trainable params
0         Non-trainable params
7.7 K     Total params
0.031     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/home/valter/Documents/Projects/read-process-embedder/.venv/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=21` in the `DataLoader` to improve performance.


                                                                           

/home/valter/Documents/Projects/read-process-embedder/.venv/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=21` in the `DataLoader` to improve performance.
/home/valter/Documents/Projects/read-process-embedder/.venv/lib/python3.13/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (25) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 49: 100%|██████████| 25/25 [00:00<00:00, 49.12it/s, v_num=334y, train_loss=0.0409, val_loss=0.0517]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 25/25 [00:00<00:00, 48.00it/s, v_num=334y, train_loss=0.0409, val_loss=0.0517]


In [8]:
# Check some random predictions
sample1 = torch.normal(mean=3, std=0.7, size=(1, 100, 1))
sample2 = torch.normal(mean=7, std=0.2, size=(1, 100, 1))
sample3 = torch.normal(mean=1, std=0.5, size=(1, 100, 1))
test_data = torch.cat([sample1, sample2, sample3], dim=0)
with torch.no_grad():
    model_output = set_encoder(test_data)
print(f"Means: {model_output[:, 0]}")
print(f"Stds: {model_output[:, 1]}")

# Permute the data and check if we get the same output
permuted_test_data = test_data[:, torch.randperm(100)]
with torch.no_grad():
    permuted_model_output = set_encoder(permuted_test_data)
print(f"Means: {permuted_model_output[:, 0]}")
print(f"Stds: {permuted_model_output[:, 1]}")

# Check the differenc
print(f"Mean difference: {torch.abs(model_output[:, 0] - permuted_model_output[:, 0])}")
print(f"Std difference: {torch.abs(model_output[:, 1] - permuted_model_output[:, 1])}")

Means: tensor([2.8770, 6.8934, 0.9088])
Stds: tensor([0.5346, 0.5427, 0.5170])
Means: tensor([2.8770, 6.8934, 0.9088])
Stds: tensor([0.5346, 0.5427, 0.5170])
Mean difference: tensor([2.3842e-07, 1.4305e-06, 9.5367e-07])
Std difference: tensor([0.0000e+00, 1.1921e-07, 1.1921e-07])
