In [2]:
import os
import copy
import numpy as np

import m2models
from m2models.trainers import EnergyTrainer
from m2models import models     
from m2models.common import logger
from m2models.common.utils import setup_logging
setup_logging()

# Download data and process data to lmdb

**Scripts for download and preprocess data into lmdb is under scripts directory**

In [None]:
from scripts.download_data import get_data

np.random.seed(42)
get_data(
    datadir=os.path.join(os.path.dirname(m2models.__path__[0]), "data"),
    property=qmof:bandgap,
    task=jarvis,
    split=random,
    seed=42,
    data_frac=[0.6, 0.2, 0.2],
    del_intmd_files=True,
)

# Specify Configurations

**Before training, create a config file to specify the dataset using, task performing, model to be trained, and optimizer. Below is an example of config file for training. More examples are under configs directory.**

In [3]:
dataset = [
          {'src': 'data/jarvis/qmof:bandgap/random_train/'},
          {'src': 'data/jarvis/qmof:bandgap/random_valid/'},
          {'src': 'data/jarvis/qmof:bandgap/random_test/'}
          ]

task = {
  "dataset": "lmdb",
  "description": "Regressing the energies",
  "type": "regression",
  "metric": "mae",
}

model = {
  "name": "cgcnn",
  "atom_embedding_size": 128,
  "fc_feat_size": 256,
  "num_fc_layers": 4,
  "num_graph_conv_layers": 5,
  "cutoff": 6.0,
  "num_gaussians": 100,
  "regress_forces": False,
  "use_pbc": True,
  "otf_graph": False,
}

optim = {
  "batch_size": 8,
  "eval_batch_size": 8,
  "num_workers":  4,
  "lr_initial": 0.0001,
  "lr_gamma": 0.1,
  "lr_milestones": [5000000],
  "warmup_steps": -1,
  "warmup_factor": 1.0,
  "max_epochs": 10,
  "eval_every": 500,
}

**Create a trainer according to the task performing. In this example, we are using energy trainer. More trainers for other tasks can be find in m2models/trainers.**

In [None]:
trainer = EnergyTrainer(
    task=task,
    model=copy.deepcopy(model),
    dataset=dataset,
    optimizer=optim,
    identifier="qmof-bandgap-03",
    logger="wandb",
)

In [5]:
trainer.model

OCPDataParallel(
  (module): CGCNN(
    (embedding_fc): Linear(in_features=92, out_features=128, bias=True)
    (convs): ModuleList(
      (0): CGCNNConv()
      (1): CGCNNConv()
      (2): CGCNNConv()
      (3): CGCNNConv()
      (4): CGCNNConv()
    )
    (conv_to_fc): Sequential(
      (0): Linear(in_features=128, out_features=256, bias=True)
      (1): Softplus(beta=1, threshold=20)
    )
    (fcs): Sequential(
      (0): Linear(in_features=256, out_features=256, bias=True)
      (1): Softplus(beta=1, threshold=20)
      (2): Linear(in_features=256, out_features=256, bias=True)
      (3): Softplus(beta=1, threshold=20)
      (4): Linear(in_features=256, out_features=256, bias=True)
      (5): Softplus(beta=1, threshold=20)
    )
    (fc_out): Linear(in_features=256, out_features=1, bias=True)
    (distance_expansion): GaussianSmearing()
  )
)

**Train the model**

In [None]:
trainer.train()