In [None]:
"""
Notebook for training the embedding model for the Lorenz system.
=====
Distributed by: Notre Dame SCAI Lab (MIT Liscense)
- Associated publication:
url: https://arxiv.org/abs/2010.03957
doi: 
github: https://github.com/zabaras/transformer-physx
=====
"""
!nvidia-smi

Wed Jul 28 23:58:01 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Environment Setup

Use pip to install from [PyPI](https://pypi.org/project/trphysx/).

In [None]:
!pip install trphysx==0.0.7



Mount google drive and create a folder to work in.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
%cd /content/gdrive/MyDrive/
% mkdir -p transformer_physx/lorenz
% cd transformer_physx/lorenz

/content/gdrive/MyDrive
/content/gdrive/MyDrive/transformer_physx/lorenz


Now lets download the training and validation data for the lorenz system. Info on wget from [Google drive](https://stackoverflow.com/questions/37453841/download-a-file-from-google-drive-using-wget). This will eventually be update to zenodo repo.

In [None]:
!mkdir data

mkdir: cannot create directory ‘data’: File exists


In [None]:
!wget -O ./data/lorenz_training_rk.hdf5 "https://drive.google.com/uc?export=download&id=1vGTGzaqEZxxuLN9K-PUrYw9SLWttdDYd"
!wget -O ./data/lorenz_valid_rk.hdf5 "https://drive.google.com/uc?export=download&id=1bxFzKg8tSagE8kXWGm2mtaJ4gPsKJ8sI"

--2021-07-29 00:09:06--  https://drive.google.com/uc?export=download&id=1vGTGzaqEZxxuLN9K-PUrYw9SLWttdDYd
Resolving drive.google.com (drive.google.com)... 172.217.15.78, 2607:f8b0:4004:80a::200e
Connecting to drive.google.com (drive.google.com)|172.217.15.78|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-0o-0o-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/rk4l6obvlpuuld5ggjv0pjgefhr94dak/1627517325000/01559412990587423567/*/1vGTGzaqEZxxuLN9K-PUrYw9SLWttdDYd?e=download [following]
--2021-07-29 00:09:06--  https://doc-0o-0o-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/rk4l6obvlpuuld5ggjv0pjgefhr94dak/1627517325000/01559412990587423567/*/1vGTGzaqEZxxuLN9K-PUrYw9SLWttdDYd?e=download
Resolving doc-0o-0o-docs.googleusercontent.com (doc-0o-0o-docs.googleusercontent.com)... 172.217.15.97, 2607:f8b0:4004:811::2001
Connecting to doc-0o-0o-docs.googleusercontent.com (doc-0o-0o-docs.goo

# Transformer-PhysX Lorenz System

Train the embedding model.
First import necessary modules from trphysx. 

In [None]:
import sys, os
import logging

import torch
from torch.optim.lr_scheduler import ExponentialLR

from trphysx.config.configuration_auto import AutoPhysConfig
from trphysx.embedding.embedding_auto import AutoEmbeddingModel
from trphysx.embedding.training import *

Training arguments.

In [None]:
argv = []
argv = argv + ["--exp_name", "lorenz"]
argv = argv + ["--training_h5_file", "./data/lorenz_training_rk.hdf5"]
argv = argv + ["--eval_h5_file", "./data/lorenz_valid_rk.hdf5"]
argv = argv + ["--batch_size", '512']
argv = argv + ["--block_size", "16"]
argv = argv + ["--n_train", "2048"]
argv = argv + ["--n_eval", "64"]
argv = argv + ["--epochs", "100"]

In [None]:
args = EmbeddingParser().parse(args=argv)  

if(torch.cuda.is_available()):
    use_cuda = "cuda"
args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Torch device:{}".format(args.device))

# Setup logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO)


Torch device:cuda:0


## Initializing Datasets and Models

Now we can use the auto classes to initialized the predefined configs, dataloaders and models. This may take a bit!

In [None]:
 # Load transformer config file
config = AutoPhysConfig.load_config(args.exp_name)
dataloader = AutoDataHandler.load_data_handler(args.exp_name)

# Set up data-loaders
training_loader = dataloader.createTrainingLoader(
    args.training_h5_file, 
    block_size=args.block_size, 
    stride=args.stride, 
    ndata=args.n_train, 
    batch_size=args.batch_size)
testing_loader = dataloader.createTestingLoader(
    args.eval_h5_file, 
    block_size=32, 
    ndata=args.n_eval, 
    batch_size=8)

# Set up model
model = AutoEmbeddingModel.init_trainer(args.exp_name, config).to(args.device)
mu, std = dataloader.norm_params
model.embedding_model.mu = mu.to(args.device)
model.embedding_model.std = std.to(args.device)
if args.epoch_start > 1:
  model.load_model(args.ckpt_dir, args.epoch_start)


07/29/2021 00:10:18 - INFO - trphysx.embedding.training.enn_data_handler -   Creating training loader.
07/29/2021 00:10:53 - INFO - trphysx.embedding.training.enn_data_handler -   Creating testing loader
07/29/2021 00:10:57 - INFO - trphysx.embedding.embedding_lorenz -   Number of embedding parameters: 36192


Initialize optimizer and scheduler. Feel free to change if you want to experiment.

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr*0.995**(args.epoch_start-1), weight_decay=1e-8)
scheduler = ExponentialLR(optimizer, gamma=0.995)

## Training the Embedding Model

Train the model. No visualization here, just boring numbers. This notebook only trains for 100 epochs for brevity, feel free to train longer. The test loss here is only the recovery loss MSE(x - decode(encode(x))) and does not reflect the quality of the Koopman dynamics.

In [None]:
trainer = EmbeddingTrainer(model, args, (optimizer, scheduler))
trainer.train(training_loader, testing_loader)

07/29/2021 00:11:42 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 1: Training loss 40621124.000, Lr 0.00100
07/29/2021 00:11:42 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 1: Test loss: 0.11
07/29/2021 00:11:45 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 2: Training loss 733321.812, Lr 0.00099
07/29/2021 00:11:47 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 3: Training loss 529222.688, Lr 0.00099
07/29/2021 00:11:49 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 4: Training loss 425458.938, Lr 0.00099
07/29/2021 00:11:52 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 5: Training loss 652651.062, Lr 0.00098
07/29/2021 00:11:52 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 5: Test loss: 0.10
07/29/2021 00:11:54 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 6: Training loss 783782.562, Lr 0.00098
07/29/2021 00:11:56 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 7: Training loss 3

Check your Google drive for checkpoints.