In [None]:
"""
Notebook for training the embedding model for the Lorenz system.
=====
Distributed by: Notre Dame SCAI Lab (MIT Liscense)
- Associated publication:
url: https://arxiv.org/abs/2010.03957
doi: 
github: https://github.com/zabaras/transformer-physx
=====
"""
!nvidia-smi

Mon Jun 28 17:04:28 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P0    28W /  70W |   1192MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Environment Setup

In [None]:
!pip install torch==1.8.1
!pip install h5py==2.10.0
!pip install filelock==3.0.12
!pip install scipy==1.6.3
!pip install matplotlib==3.4.2



First mount google drive and clone transformer physx repo.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd /content/gdrive/MyDrive/

/content/gdrive/MyDrive


In [None]:
!git clone https://github.com/zabaras/transformer-physx.git
%cd ./transformer-physx/examples/lorenz/

Cloning into 'transformer-physx'...
remote: Enumerating objects: 653, done.[K
remote: Counting objects: 100% (653/653), done.[K
remote: Compressing objects: 100% (455/455), done.[K
remote: Total 653 (delta 274), reused 534 (delta 162), pack-reused 0[K
Receiving objects: 100% (653/653), 14.68 MiB | 20.13 MiB/s, done.
Resolving deltas: 100% (274/274), done.
/content/gdrive/My Drive/transformer-physx/examples/lorenz/transformer-physx/examples/lorenz


Now lets download the training and validation data for the lorenz system. Info on wget from [Google drive](https://stackoverflow.com/questions/37453841/download-a-file-from-google-drive-using-wget). This will eventually be update to zenodo repo.

In [None]:
!mkdir data

In [None]:
!wget -O ./data/lorenz_training_rk.hdf5 "https://drive.google.com/uc?export=download&id=1vGTGzaqEZxxuLN9K-PUrYw9SLWttdDYd"
!wget -O ./data/lorenz_valid_rk.hdf5 "https://drive.google.com/uc?export=download&id=1bxFzKg8tSagE8kXWGm2mtaJ4gPsKJ8sI"

--2021-06-28 16:43:26--  https://drive.google.com/uc?export=download&id=1vGTGzaqEZxxuLN9K-PUrYw9SLWttdDYd
Resolving drive.google.com (drive.google.com)... 142.250.73.206, 2607:f8b0:4004:837::200e
Connecting to drive.google.com (drive.google.com)|142.250.73.206|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-0o-0o-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/74vbh303qf81opqa4fp062e7qth6ub9d/1624898550000/01559412990587423567/*/1vGTGzaqEZxxuLN9K-PUrYw9SLWttdDYd?e=download [following]
--2021-06-28 16:43:26--  https://doc-0o-0o-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/74vbh303qf81opqa4fp062e7qth6ub9d/1624898550000/01559412990587423567/*/1vGTGzaqEZxxuLN9K-PUrYw9SLWttdDYd?e=download
Resolving doc-0o-0o-docs.googleusercontent.com (doc-0o-0o-docs.googleusercontent.com)... 172.217.9.193, 2607:f8b0:4004:806::2001
Connecting to doc-0o-0o-docs.googleusercontent.com (doc-0o-0o-docs.g

# Transformer-PhysX Lorenz System

Train the embedding model.
First import necessary modules from trphysx. 

In [None]:
import sys, os
sys.path.append('../..')
import logging

import torch
from torch.optim.lr_scheduler import ExponentialLR

from trphysx.config.configuration_auto import AutoPhysConfig
from trphysx.embedding.embedding_auto import AutoEmbeddingModel
from trphysx.embedding.training import *

Training arguments.

In [None]:
argv = []
argv = argv + ["--exp_name", "lorenz"]
argv = argv + ["--training_h5_file", "./data/lorenz_training_rk.hdf5"]
argv = argv + ["--eval_h5_file", "./data/lorenz_valid_rk.hdf5"]
argv = argv + ["--batch_size", '512']
argv = argv + ["--block_size", "16"]
argv = argv + ["--ntrain", "2048"]
argv = argv + ["--epochs", "100"]

In [None]:
args = EmbeddingParser().parse(args=argv)  

if(torch.cuda.is_available()):
    use_cuda = "cuda"
args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Torch device:{}".format(args.device))

# Setup logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO)


Torch device:cuda:0


Now we can use the auto classes to initialized the predefined configs, dataloaders and models. This may take a bit!

## Initalizing Datasets and Models

In [None]:
 # Load transformer config file
config = AutoPhysConfig.load_config(args.exp_name)
dataloader = AutoDataHandler.load_data_handler(args.exp_name)

# Set up data-loaders
training_loader = dataloader.createTrainingLoader(args.training_h5_file, block_size=args.block_size, stride=args.stride, ndata=args.ntrain, batch_size=args.batch_size)
testing_loader = dataloader.createTestingLoader(args.eval_h5_file, block_size=32, ndata=args.ntest, batch_size=8)

# Set up model
model = AutoEmbeddingModel.init_trainer(args.exp_name, config).to(args.device)
mu, std = dataloader.norm_params
model.embedding_model.mu = mu.to(args.device)
model.embedding_model.std = std.to(args.device)
if args.epoch_start > 1:
  model.load_model(args.ckpt_dir, args.epoch_start)


06/28/2021 16:49:01 - INFO - trphysx.embedding.training.enn_data_handler -   Creating training loader.
06/28/2021 16:49:53 - INFO - trphysx.embedding.training.enn_data_handler -   Creating testing loader
06/28/2021 16:49:55 - INFO - trphysx.embedding.embedding_lorenz -   Number of embedding parameters: 36192


Initialize optimizer and scheduler. Feel free to change if you want to experiment.

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr*0.995**(args.epoch_start-1), weight_decay=1e-8)
scheduler = ExponentialLR(optimizer, gamma=0.995)

Train the model. No visualization here, just boring numbers.

## Training the Embedding Model

In [None]:
trainer = EmbeddingTrainer(model, args, (optimizer, scheduler))
trainer.trainKoopman(training_loader, testing_loader)

06/28/2021 16:50:12 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 1: Training loss 40638908.000, Lr 0.00100
06/28/2021 16:50:12 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 1: Test loss: 0.49
06/28/2021 16:50:15 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 2: Training loss 811423.938, Lr 0.00099
06/28/2021 16:50:17 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 3: Training loss 810006.938, Lr 0.00099
06/28/2021 16:50:20 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 4: Training loss 681831.188, Lr 0.00099
06/28/2021 16:50:23 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 5: Training loss 704575.250, Lr 0.00098
06/28/2021 16:50:23 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 5: Test loss: 0.43
06/28/2021 16:50:25 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 6: Training loss 391986.812, Lr 0.00098
06/28/2021 16:50:28 - INFO - trphysx.embedding.training.enn_trainer -   Epoch 7: Training loss 3

Check your Google drive for checkpoints.