In [1]:
import warnings
warnings.filterwarnings("ignore")
import torch
from lightning import seed_everything
from e3nn.io import CartesianTensor
import pandas as pd

seed_everything(1234)
torch.multiprocessing.set_sharing_strategy('file_system')
default_dtype = torch.float64
torch.set_default_dtype(default_dtype)

from utils_data import load_data

ct = CartesianTensor("ij=ji")
file_path = '../our-dielectric-dataset.csv'
df = load_data(file_path,ct)

df = df[df["dielectric_scalar"] < 15]
df = df.reset_index(drop=True)
df.rename({"dielectric_irreps": "target"}, axis=1, inplace=True)

Seed set to 1234


In [2]:
from utils_data import BaseDataset
from utils_model import E3nnModel
from utils_train import BaseLightning
from lightning.pytorch import Trainer
from pytorch_lightning.loggers import CSVLogger
dataset = BaseDataset(df[:200], cutoff=5)

100%|██████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 2298.82it/s]


In [3]:
net = E3nnModel(
    in_dim=118,                     # dimension of one-hot node feature
    in_attr_dim=118,                # dimension of one-hot node attribute
    em_dim=48,                      # dimension of node feature embedding
    em_attr_dim=48,                 # dimension of node attribute embedding
    irreps_out=str(ct),             # output irrep shape
    layers=2,                       # number of gate layers
    mul=48,                         # multiplicity of features after each layers
    lmax=3,                         # highest l for spherical harmonics
    max_radius=dataset.cutoff,
    number_of_basis=15,             # basis for radial embedding
    num_neighbors=dataset.num_neigbours,
    reduce_output=True,
    same_em_layer=True              # whether to use the same embedding layer for one-hot atom type and one-hot atomic mass
)

In [4]:
optimizer = torch.optim.AdamW(net.parameters(), lr=0.003, weight_decay=0.03)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)

In [5]:
model = BaseLightning(
    dataset,
    net,
    batch_size=12,
    optimizer=optimizer,
    scheduler=scheduler
)

trainer = Trainer(
    max_epochs=120,
    accelerator="gpu",
    logger=CSVLogger(".", name="stock_models"),
    enable_progress_bar=True,
    # strategy="ddp_spawn",
)
trainer.fit(model)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
The following callbacks returned in `LightningModule.configure_callbacks` will override existing callbacks passed to Trainer: ModelCheckpoint
You are using a CUDA device ('NVIDIA GeForce RTX 3080 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | model | E3nnModel | 10.4 M
------------------------------------
10.4 M    Trainable params
0         Non-trainable params
10.4 M    Total params
41.471    Total estimated model params size (MB)
