# Initial Structure to Relaxed Energy (IS2RE) <a name="is2re"></a>
The IS2RE task predicts the relaxed energy (energy of the relaxed state) given the initial state of a system. One approach to this is by training a regression model mapping the initial structure to the relaxed energy. We call this the *direct* approach to the IS2RE task. 

An alternative is to perform a structure relaxation using an S2EF model to obtain the relaxed state and compute the energy of that state (see the IS2RS task below for details about relaxation).

### Steps for training an IS2RE model
1) Define or load a configuration (config), which includes the following
* task
* model
* optimizer
* dataset
* trainer

2) Create an EnergyTrainer object

3) Train the model

4) Validate the model

### Imports

In [None]:
from ocpmodels.trainers import EnergyTrainer
from ocpmodels.datasets import SinglePointLmdbDataset
from ocpmodels import models
from ocpmodels.common import logger
from ocpmodels.common.utils import setup_logging
setup_logging()

import numpy as np
import copy
import os

### Dataset

In [None]:
train_src = "data/is2re/train_100/data.lmdb"
val_src = "data/is2re/val_20/data.lmdb"

### Normalize data

If you wish to normalize the targets we must compute the mean and standard deviation for our energy values.

In [None]:
train_dataset = SinglePointLmdbDataset({"src": train_src})

energies = []
for data in train_dataset:
  energies.append(data.y_relaxed)

mean = np.mean(energies)
stdev = np.std(energies)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Define the Config

For this example, we will explicitly define the config; however, a set of default configs can be found [here](https://github.com/Open-Catalyst-Project/ocp/tree/master/configs). Default config yaml files can easily be loaded with the following [utility](https://github.com/Open-Catalyst-Project/ocp/blob/aa8e44d50229fce887b3a94a5661c4f85cd73eed/ocpmodels/common/utils.py#L361-L400). Loading a yaml config is preferrable when launching jobs from the command line. We have included our best models' config files here for reference. 

**Note** - we only train for a single epoch with a reduced batch size (GPU memory constraints) for demonstration purposes, modify accordingly for full convergence.

In [None]:
# Task
task = {
  "dataset": "single_point_lmdb",
  "description": "Relaxed state energy prediction from initial structure.",
  "type": "regression",
  "metric": "mae",
  "labels": ["relaxed energy"],
}
# Model
model = {
    'name': 'gemnet_t',
    "num_spherical": 7,
    "num_radial": 64,
    "num_blocks": 5,
    "emb_size_atom": 256,
    "emb_size_edge": 512,
    "emb_size_trip": 64,
    "emb_size_rbf": 16,
    "emb_size_cbf": 16,
    "emb_size_bil_trip": 64,
    "num_before_skip": 1,
    "num_after_skip": 2,
    "num_concat": 1,
    "num_atom": 3,
    "cutoff": 6.0,
    "max_neighbors": 50,
    "rbf": {"name": "gaussian"},
    "envelope": {
      "name": "polynomial",
      "exponent": 5,
    },
    "cbf": {"name": "spherical_harmonics"},
    "extensive": True,
    "otf_graph": False,
    "output_init": "HeOrthogonal",
    "activation": "silu",
    "scale_file": "configs/s2ef/all/gemnet/scaling_factors/gemnet-dT.json",
    "regress_forces": False,
    "direct_forces": False,
}
# Optimizer
optimizer = {
    'batch_size': 1,         # originally 32
    'eval_batch_size': 1,    # originally 32
    'num_workers': 2,
    'lr_initial': 1.e-4,
    'optimizer': 'AdamW',
    'optimizer_params': {"amsgrad": True},
    'scheduler': "ReduceLROnPlateau",
    'mode': "min",
    'factor': 0.8,
    'patience': 3,
    'max_epochs': 1,         # used for demonstration purposes
    'ema_decay': 0.999,
    'clip_grad_norm': 10,
    'loss_energy': 'mae',
}
# Dataset
dataset = [
  {'src': train_src,
   'normalize_labels': True,
   'target_mean': mean,
   'target_std': stdev,
  }, # train set 
  {'src': val_src}, # val set (optional)
]

###Create EnergyTrainer

In [None]:
energy_trainer = EnergyTrainer(
    task=task,
    model=copy.deepcopy(model), # copied for later use, not necessary in practice.
    dataset=dataset,
    optimizer=optimizer,
    identifier="IS2RE-example",
    run_dir="./", # directory to save results if is_debug=False. Prediction files are saved here so be careful not to override!
    is_debug=False, # if True, do not save checkpoint, logs, or results
    print_every=5,
    seed=0, # random seed to use
    logger="tensorboard", # logger of choice (tensorboard and wandb supported)
    local_rank=0,
    amp=True, # use PyTorch Automatic Mixed Precision (faster training and less memory usage)    
)

amp: true
cmd:
  checkpoint_dir: ./checkpoints/2022-10-28-20-09-36-IS2RE-example
  commit: 6e750b2
  identifier: IS2RE-example
  logs_dir: ./logs/tensorboard/2022-10-28-20-09-36-IS2RE-example
  print_every: 5
  results_dir: ./results/2022-10-28-20-09-36-IS2RE-example
  seed: 0
  timestamp_id: 2022-10-28-20-09-36-IS2RE-example
dataset:
  normalize_labels: true
  src: data/is2re/train_100/data.lmdb
  target_mean: !!python/object/apply:numpy.core.multiarray.scalar
  - &id001 !!python/object/apply:numpy.dtype
    args:
    - f8
    - false
    - true
    state: !!python/tuple
    - 3
    - <
    - null
    - null
    - null
    - -1
    - -1
    - 0
  - !!binary |
    MjyJzgpQ978=
  target_std: !!python/object/apply:numpy.core.multiarray.scalar
  - *id001
  - !!binary |
    PnyyzMtk/T8=
gpus: 1
logger: tensorboard
model: gemnet_t
model_attributes:
  activation: silu
  cbf:
    name: spherical_harmonics
  cutoff: 6.0
  direct_forces: false
  emb_size_atom: 256
  emb_size_bil_trip: 64
  emb_



In [None]:
energy_trainer.model

OCPDataParallel(
  (module): GemNetT(
    (radial_basis): RadialBasis(
      (envelope): PolynomialEnvelope()
      (rbf): GaussianSmearing()
    )
    (cbf_basis3): CircularBasisLayer(
      (radial_basis): RadialBasis(
        (envelope): PolynomialEnvelope()
        (rbf): GaussianSmearing()
      )
    )
    (mlp_rbf3): Dense(
      (linear): Linear(in_features=64, out_features=16, bias=False)
      (_activation): Identity()
    )
    (mlp_cbf3): EfficientInteractionDownProjection()
    (mlp_rbf_h): Dense(
      (linear): Linear(in_features=64, out_features=16, bias=False)
      (_activation): Identity()
    )
    (mlp_rbf_out): Dense(
      (linear): Linear(in_features=64, out_features=16, bias=False)
      (_activation): Identity()
    )
    (atom_emb): AtomEmbedding(
      (embeddings): Embedding(83, 256)
    )
    (edge_emb): EdgeEmbedding(
      (dense): Dense(
        (linear): Linear(in_features=576, out_features=512, bias=False)
        (_activation): ScaledSiLU(
          

### Train the Model

In [None]:
energy_trainer.train()

  neighbors_new // 2,
  block_sizes = neighbors // 2


energy_mae: 6.19e+01, energy_mse: 3.84e+03, energy_within_threshold: 0.00e+00, loss: 6.75e+01, lr: 1.00e-04, epoch: 5.00e-02, step: 5.00e+00
energy_mae: 1.86e+02, energy_mse: 3.46e+04, energy_within_threshold: 0.00e+00, loss: 2.03e+02, lr: 1.00e-04, epoch: 1.00e-01, step: 1.00e+01
energy_mae: 2.88e+03, energy_mse: 8.30e+06, energy_within_threshold: 0.00e+00, loss: 3.14e+03, lr: 1.00e-04, epoch: 1.50e-01, step: 1.50e+01
energy_mae: 5.87e+02, energy_mse: 3.45e+05, energy_within_threshold: 0.00e+00, loss: 3.20e+02, lr: 1.00e-04, epoch: 2.00e-01, step: 2.00e+01
energy_mae: 4.46e+03, energy_mse: 1.99e+07, energy_within_threshold: 0.00e+00, loss: 2.43e+03, lr: 1.00e-04, epoch: 2.50e-01, step: 2.50e+01
energy_mae: 4.09e+01, energy_mse: 1.67e+03, energy_within_threshold: 0.00e+00, loss: 2.22e+01, lr: 1.00e-04, epoch: 3.00e-01, step: 3.00e+01
energy_mae: 1.22e+02, energy_mse: 1.49e+04, energy_within_threshold: 0.00e+00, loss: 6.64e+01, lr: 1.00e-04, epoch: 3.50e-01, step: 3.50e+01
energy_mae: 3

ValueError: ignored

### Validate the Model

#### Load the best checkpoint

In [None]:
# The `best_checpoint.pt` file contains the checkpoint with the best val performance
checkpoint_path = os.path.join(energy_trainer.config["cmd"]["checkpoint_dir"], "best_checkpoint.pt")
checkpoint_path

'./checkpoints/2022-10-28-20-09-36-IS2RE-example/best_checkpoint.pt'

In [None]:
# Append the dataset with the test set. We use the same val set for demonstration.

# Dataset
dataset.append(
  {'src': val_src}, # test set (optional)
)
dataset

[{'src': 'data/is2re/train_100/data.lmdb',
  'normalize_labels': True,
  'target_mean': -1.4570415561499996,
  'target_std': 1.8371084209427546},
 {'src': 'data/is2re/val_20/data.lmdb'},
 {'src': 'data/is2re/val_20/data.lmdb'}]

In [None]:
pretrained_energy_trainer = EnergyTrainer(
    task=task,
    model=model,
    dataset=dataset,
    optimizer=optimizer,
    identifier="IS2RE-val-example",
    run_dir="./", # directory to save results if is_debug=False. Prediction files are saved here so be careful not to override!
    is_debug=False, # if True, do not save checkpoint, logs, or results
    print_every=10,
    seed=0, # random seed to use
    logger="tensorboard", # logger of choice (tensorboard and wandb supported)
    local_rank=0,
    amp=True, # use PyTorch Automatic Mixed Precision (faster training and less memory usage)
)

pretrained_energy_trainer.load_checkpoint(checkpoint_path=checkpoint_path)

amp: true
cmd:
  checkpoint_dir: ./checkpoints/2022-10-28-20-09-36-IS2RE-val-example
  commit: 6e750b2
  identifier: IS2RE-val-example
  logs_dir: ./logs/tensorboard/2022-10-28-20-09-36-IS2RE-val-example
  print_every: 10
  results_dir: ./results/2022-10-28-20-09-36-IS2RE-val-example
  seed: 0
  timestamp_id: 2022-10-28-20-09-36-IS2RE-val-example
dataset:
  normalize_labels: true
  src: data/is2re/train_100/data.lmdb
  target_mean: !!python/object/apply:numpy.core.multiarray.scalar
  - &id001 !!python/object/apply:numpy.dtype
    args:
    - f8
    - false
    - true
    state: !!python/tuple
    - 3
    - <
    - null
    - null
    - null
    - -1
    - -1
    - 0
  - !!binary |
    MjyJzgpQ978=
  target_std: !!python/object/apply:numpy.core.multiarray.scalar
  - *id001
  - !!binary |
    PnyyzMtk/T8=
gpus: 1
logger: tensorboard
model: gemnet_t
model_attributes:
  activation: silu
  cbf:
    name: spherical_harmonics
  cutoff: 6.0
  direct_forces: false
  emb_size_atom: 256
  emb_siz



FileNotFoundError: ignored

#### Test the model

In [None]:
# make predictions on the existing test_loader
predictions = pretrained_energy_trainer.predict(pretrained_trainer.test_loader, results_file="is2re_results", disable_tqdm=False)

ValueError: ignored

In [None]:
energies = predictions["energy"]