In [1]:
from pathlib import PurePath

from code.config import cfg, update_cfg
from code.data_utils.dataset import DatasetLoader
from code.utils import project_root_path
from code.generate_description import (generate_structure_description, generate_atom_feature_description, 
                                       generate_bond_feature_description, generate_full_description, generate_all_atom_feature_description)

import warnings
warnings.filterwarnings('ignore')

In [2]:
cfg.dataset = "ogbg-molbace"

data_path = PurePath(project_root_path, "data")

In [3]:
# Preprocess data
dataloader = DatasetLoader(name=cfg.dataset, text='raw')
dataset, smiles = dataloader.dataset, dataloader.text

In [4]:
index = 0
structure_description = generate_structure_description(
    index=index, smiles_string=smiles[index]
)
atom_description = generate_atom_feature_description(
    atom_x=dataset.x[index], atom_index=index, smiles_string=smiles[index]
)
bond_description = generate_bond_feature_description(bond_x=dataset.edge_attr[index])
full_atom_description = generate_all_atom_feature_description(
    index=index, smiles_string=smiles[index], mol_x=dataset[index].x
)
full_description = generate_full_description(
    index=index, smiles_string=smiles[index], atom_x=dataset[index].x
)

In [5]:
structure_description

'This molecule can be represented as a graph among atoms 0 (O), 1 (C), 2 (C), 3 (C), 4 (N), 5 (C), 6 (O), 7 (C), 8 (C), 9 (C), 10 (C), 11 (C), 12 (C), 13 (C), 14 (C), 15 (C), 16 (C), 17 (N), 18 (C), 19 (N), 20 (C), 21 (C), 22 (C), 23 (C), 24 (C), 25 (C), 26 (C), 27 (C), 28 (C), 29 (C), 30 (C), 31 (C). In this graph:\nAtom 0 (O) is connected to atom 1 (C) and atom 29 (C).\nAtom 1 (C) is connected to atom 0 (O) and atom 2 (C).\nAtom 2 (C) is connected to atom 1 (C) and atom 3 (C).\nAtom 3 (C) is connected to atom 2 (C) and atom 4 (N) and atom 28 (C).\nAtom 4 (N) is connected to atom 3 (C) and atom 5 (C).\nAtom 5 (C) is connected to atom 4 (N) and atom 6 (O) and atom 7 (C).\nAtom 6 (O) is connected to atom 5 (C).\nAtom 7 (C) is connected to atom 5 (C) and atom 8 (C) and atom 27 (C).\nAtom 8 (C) is connected to atom 7 (C) and atom 9 (C).\nAtom 9 (C) is connected to atom 8 (C) and atom 10 (C) and atom 18 (C).\nAtom 10 (C) is connected to atom 9 (C) and atom 11 (C).\nAtom 11 (C) is connected

In [6]:
atom_description

'Atom 0 (O) has 7 atomics, has no specified chirality, has 2 bonds with other atoms, has a positive charge of 5, has 0 hydrogen atoms attached to it, has 0 unpaired electrons, has a SP3 hybridization, is not aromatic, is part of a ring.'

In [9]:
bond_description

'The bound type is SINGLE. The bond Stereo is STEREONONE. The bond is not Conjugated.'

In [7]:
full_atom_description

'This molecule can be represented as a graph among atoms 0 (O), 1 (C), 2 (C), 3 (C), 4 (N), 5 (C), 6 (O), 7 (C), 8 (C), 9 (C), 10 (C), 11 (C), 12 (C), 13 (C), 14 (C), 15 (C), 16 (C), 17 (N), 18 (C), 19 (N), 20 (C), 21 (C), 22 (C), 23 (C), 24 (C), 25 (C), 26 (C), 27 (C), 28 (C), 29 (C), 30 (C), 31 (C). In this graph:\nAtom 0 (O) has 7 atomics, has no specified chirality, has 2 bonds with other atoms, has a positive charge of 5, has 0 hydrogen atoms attached to it, has 0 unpaired electrons, has a SP3 hybridization, is not aromatic, is part of a ring.\nAtom 1 (C) has 5 atomics, has no specified chirality, has 4 bonds with other atoms, has a positive charge of 5, has 2 hydrogen atoms attached to it, has 0 unpaired electrons, has a SP3 hybridization, is not aromatic, is part of a ring.\nAtom 2 (C) has 5 atomics, has no specified chirality, has 4 bonds with other atoms, has a positive charge of 5, has 2 hydrogen atoms attached to it, has 0 unpaired electrons, has a SP3 hybridization, is not 

In [8]:
full_description

'This molecule can be represented as a graph among atoms 0 (O), 1 (C), 2 (C), 3 (C), 4 (N), 5 (C), 6 (O), 7 (C), 8 (C), 9 (C), 10 (C), 11 (C), 12 (C), 13 (C), 14 (C), 15 (C), 16 (C), 17 (N), 18 (C), 19 (N), 20 (C), 21 (C), 22 (C), 23 (C), 24 (C), 25 (C), 26 (C), 27 (C), 28 (C), 29 (C), 30 (C), 31 (C). In this graph:\nAtom 0 (O) has 7 atomics, has no specified chirality, has 2 bonds with other atoms, has a positive charge of 5, has 0 hydrogen atoms attached to it, has 0 unpaired electrons, has a SP3 hybridization, is not aromatic, is part of a ring.\nAtom 0 (O) is connected to atom 1 (C) and atom 29 (C).\nAtom 1 (C) has 5 atomics, has no specified chirality, has 4 bonds with other atoms, has a positive charge of 5, has 2 hydrogen atoms attached to it, has 0 unpaired electrons, has a SP3 hybridization, is not aromatic, is part of a ring.\nAtom 1 (C) is connected to atom 0 (O) and atom 2 (C).\nAtom 2 (C) has 5 atomics, has no specified chirality, has 4 bonds with other atoms, has a positi