In [1]:
from vierlinden.config import model_output_path
from vierlinden.data.loader import VierlindenDataProcessor
from vierlinden.model.model import NHitsTrainingWrapper

batch_size = 32
num_workers = 18

def main(arg : str):
    dp = VierlindenDataProcessor()
    df = dp.load_processed_data()
    df = dp.prepare_for_target(df, arg)
    
    training_df, test_df = dp.split_data(df)
    
    nhits_wrapper = NHitsTrainingWrapper(training_df, arg, batch_size, num_workers)
    optimal_lr = nhits_wrapper.find_optimal_learningrate()
    
    print(f"Optimal learning rate for {arg}: {optimal_lr}")
    
    trainer = nhits_wrapper.train(optimal_lr, seed = 42)
    
    return trainer, test_df, nhits_wrapper

  from tqdm.autonotebook import tqdm


In [2]:
trainer, test_df, nhits_wrapper = main('Kaiserstr_outflow [l/s]')

INFO:vierlinden.data.loader:Data loaded successfully from /storage/projects/RIWWER/data/Vierlinden
INFO:vierlinden.data.loader:Sensor and target data merged successfully.
INFO:vierlinden.data.loader:NaN values processed successfully.
INFO:vierlinden.data.loader:Data loaded and processed successfully.


INFO:vierlinden.model.model:Training and validation data and data loaders created successfully.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Finding best initial lr:  98%|█████████▊| 98/100 [00:03<00:00, 33.84it/s]`Trainer.fit` stopped: `max_steps=100` reached.
Finding best initial lr: 100%|██████████| 100/100 [00:03<00:00, 30.26it/s]
Learning rate set to 0.0019952623149688802
Restoring states from the checkpoint path at /storage/projects/RIWWER/UT_RIWWER_project/.lr_find_0cbd0f86-f05f-4bdd-aff1-1478c8d1e354.ckpt
Restored all states from the checkpoint at /storage/projects/RIWWER/UT_RIWWER_project/.lr_find_0cbd0f86-f05f-4bdd-aff1-1478c8d1e354.ckpt
INFO:vierlinden.model.model:Start setting up trainer and network.
Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 T

Optimal learning rate for Kaiserstr_outflow [l/s]: 0.0019952623149688802


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type           | Params
---------------------------------------------------
0 | loss            | MASE           | 0     
1 | logging_metrics | ModuleList     | 0     
2 | embeddings      | MultiEmbedding | 0     
3 | model           | NHiTS          | 872 K 
---------------------------------------------------
872 K     Trainable params
0         Non-trainable params
872 K     Total params
3.488     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 193/193 [00:34<00:00,  5.62it/s, v_num=1, train_loss_step=2.140, val_loss=1.060, train_loss_epoch=1.510]

Metric val_loss improved. New best score: 1.061


Epoch 4:   0%|          | 0/193 [00:00<?, ?it/s, v_num=1, train_loss_step=1.190, val_loss=1.130, train_loss_epoch=1.400]          

Swapping scheduler `ReduceLROnPlateau` for `SWALR`


Epoch 10: 100%|██████████| 193/193 [00:37<00:00,  5.21it/s, v_num=1, train_loss_step=0.662, val_loss=1.150, train_loss_epoch=1.250]

Monitored metric val_loss did not improve in the last 10 records. Best score: 1.061. Signaling Trainer to stop.


Epoch 10: 100%|██████████| 193/193 [00:37<00:00,  5.17it/s, v_num=1, train_loss_step=0.662, val_loss=1.150, train_loss_epoch=1.250]


INFO:vierlinden.model.model:Training procedure completed.


In [4]:
trainer.save_checkpoint(model_output_path + "/" + 'NHits_Vierlinden_Kaiserstr.ckpt')