## Novel Molecule Generation using Bidirectional-Recurrent Neural Networks with Attention Applied to Simplified Molecular Input Line Entry Sysem (SMILES)

## Hyperparameter tuning using Ray-Tune

author: anthony j. vasquez
email: vanthony715@gmail.com / avasque1@jh.edu

In [1]:
import time
from datetime import datetime
t0 = time.time()

import sys
sys.path.append('./data/')

import gc
gc.collect()

import os
##had a hard time with this setting on windows os using spyder and jypyter
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

import warnings
warnings.filterwarnings("ignore")

import platform
print("Operating System: ", platform.system())
print("Machine Type:: ", platform.machine())

import re
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
from tqdm import tqdm
from scipy.ndimage import gaussian_filter1d
from sklearn.model_selection import train_test_split

from rdkit import Chem
from rdkit.Chem import BRICS
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
from rdkit.Chem.Descriptors import CalcMolDescriptors
from rdkit.Chem.rdMolDescriptors import GetHashedMorganFingerprint

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader
import torch.distributed as dist

# !pip install ray[tune]
# !pip install -U ray
# !pip install tensorboard tensorboardx
import ray
from ray import tune
from ray.tune import Stopper
from ray.tune.schedulers import TrialScheduler, ASHAScheduler

from utils import make_dir

import multiprocessing as mp
max_processors = mp.cpu_count()
print('Processor Count: ', max_processors, '\n')

if torch.cuda.is_available():
    torch.cuda.empty_cache() #empty cache
    torch.cuda.reset_max_memory_allocated()
    torch.cuda.reset_max_memory_cached()
    print('CUDA Version')
    print(f'CUDNN Version: {torch.backends.cudnn.version()}')
    print(f'Number of CUDA Devices: {torch.cuda.device_count()}')
    print(f'Active CUDA Device: {torch.cuda.current_device()}')
    print(f'Available devices: {torch.cuda.device_count()}, Name: {torch.cuda.get_device_name(0)}')
    print(f'Current CUDA device: {torch.cuda.current_device()}')
    print('\n')

##hardware params
# DEVICE = torch.device('cpu') #DEBUG
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using Device: \n', DEVICE)


Operating System:  Linux
Machine Type::  x86_64
Processor Count:  24 

CUDA Version
CUDNN Version: 90100
Number of CUDA Devices: 2
Active CUDA Device: 0
Available devices: 2, Name: NVIDIA RTX A4000
Current CUDA device: 0


Using Device: 
 cuda


### Hyperparameters

In [2]:
##OS
LINUX = True
MULTI_GPU = False #trains using multiple gpus
N_PROCESSORS = max_processors - 2

##data
HOLDOUT_PERC = 0.95
TRAIN_BS = 512 #train batch size
PREFETCH_FACTOR = 4 #effectively reduces gpu load time 
NUM_WORKERS = 8

##training
BETAS = (0.5, 0.999) #momentum moving average
WEIGHT_DECAY = 1e-5 #L2 Regularization

##Tuning
MAX_CONCURRENT_TRIALS = 4 #how many experiments to run at one time
NUM_SAMPLES = 200 ##Number of configurations to try
MAX_T = 2500 #time in seconds 
PATIENCE = 5 #num epochs to wait without improvement before stopping
REDUCTION_FACTOR = 2 #reduce experiments by 2, with only best proceeding

##logging
RESULTS_PATH = './results/tune/'

### Get Data

In [3]:
%%time

def build_vocabulary(smiles_list: list) -> dict:
    '''
    Build reference vocab
    '''
    vocab = defaultdict(int)
    for smiles in smiles_list:
        tokens = tokenize_smiles(smiles)
        for token in tokens:
            if token not in vocab:
                vocab[token] = len(vocab) + 1
    return vocab

def tokenize_smiles(smiles: str) -> list:
    '''
    Generate tokens
    '''
    pattern = r"(\[[^\[\]]*\])"
    tokens = re.split(pattern, smiles)
    return [token for token in tokens if token]

### Import the data
data_file = pd.read_csv('./data/Zinc_all_smiles_data.txt', header = None)
data_file.columns = ['smiles']
smilesList = data_file['smiles'].tolist()

##split dset
train_smiles, holdout_smiles, _, _ = train_test_split(smilesList, smilesList, test_size=HOLDOUT_PERC, random_state=42)
print('Len all smiles: ', len(smilesList))
print('Len train smiles: ', len(train_smiles))
print('Len holdout smiles: ', len(holdout_smiles))

# Example usage:
vocab = build_vocabulary(smilesList)
max_length = max(len(tokenize_smiles(smiles)) for smiles in smilesList)

print('\n')

Len all smiles:  249456
Len train smiles:  12472
Len holdout smiles:  236984


CPU times: user 903 ms, sys: 9.6 ms, total: 913 ms
Wall time: 920 ms


In [4]:
now = datetime.now()
dt_string = now.strftime("%d-%m-%Y_%H-%M-%S")

WRITEPATH = RESULTS_PATH + '_tune_' + dt_string
make_dir(WRITEPATH)

Created Folder at:  ./results/tune/_tune_20-08-2024_08-11-46


### Create Custom Smiles Dset

In [5]:
%%time

def encode_smiles(smiles: str, vocab: dict) -> list:
    '''
    Encodes (tokenizes) smiles 
    '''
    tokens = tokenize_smiles(smiles)
    return [vocab[token] for token in tokens]

def decode_smiles(encoded_smiles: list, vocab: dict) -> str:
    '''
    Decodes (de-tokenizes) smiles
    '''
    inv_vocab = {v: k for k, v in vocab.items()}
    return ''.join([inv_vocab[token] for token in encoded_smiles])

from torch.utils.data import Dataset
class SMILESDataset(Dataset):
    '''
    Custom Dataset for SMILES Strings
    '''
    def __init__(self, smiles_list: list, vocab: dict, max_length: int) -> None:
        
        '''
        Custom smiles dataset
        '''
        self.smiles_list = smiles_list
        self.vocab = vocab
        self.max_length = max_length

    def __len__(self):
        '''
        Required
        '''
        return len(self.smiles_list)

    def __getitem__(self, idx: int) -> torch.tensor:
        '''
        Required
        '''
        smiles = self.smiles_list[idx]
        encoded = encode_smiles(smiles, self.vocab)
        padded = encoded + [0] * (self.max_length - len(encoded))
        return torch.tensor(padded, dtype=torch.long)

##define dataset
dataset = SMILESDataset(train_smiles, vocab, max_length)

print('Data Summary: ')
print('smilesList Len: ', len(smilesList))
print('vocab len: ', len(vocab))
print('max_length: ', max_length)
print('\n')

Data Summary: 
smilesList Len:  249456
vocab len:  254593
max_length:  27


CPU times: user 426 µs, sys: 0 ns, total: 426 µs
Wall time: 394 µs


### Define Network

In [6]:
class Generator(nn.Module):
    def __init__(self, vocab_size: int, embedding_dim: int, hidden_dim: int, max_length: int, num_heads: int, num_layers: int, dropout_prob: float,
                bidirectional: bool) -> None:
        '''
        Bidirectional Recurrent Generator with Attention Layer
        '''
        super(Generator, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)  #generate embeddings
        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=num_layers, dropout=dropout_prob, batch_first=True, bidirectional=bidirectional) #gru instead of lstm
        if bidirectional: #if bidirectional, multiply by 2
            self.attention = nn.MultiheadAttention(hidden_dim*2, num_heads, dropout=dropout_prob)
            self.fc = nn.Linear(hidden_dim * 2, vocab_size)
        else:
            self.attention = nn.MultiheadAttention(hidden_dim, num_heads, dropout=dropout_prob)
            self.fc = nn.Linear(hidden_dim, vocab_size)
        self.dropout = nn.Dropout(dropout_prob) #very prone to overfitting, so dropout or some type of regularization is needed
        self.max_length = max_length

    def forward(self, z: torch.tensor) -> torch.tensor:
        '''
        Builds graph and forwards
        '''
        embedded = self.embedding(z)  # Shape: (batch_size, max_length, embedding_dim)
        gru_out, _ = self.gru(embedded)  # Shape: (batch_size, max_length, hidden_dim)
        gru_out = self.dropout(gru_out)
        
        #multi-head attention (MultiheadAttention expects (seq_len, batch_size, hidden_dim))
        attn_input = gru_out.transpose(0, 1)  #Transpose -> (max_length, batch_size, hidden_dim)
        attn_out, _ = self.attention(attn_input, attn_input, attn_input)  # Self-attention
        attn_out = attn_out.transpose(0, 1)  # Transpose back -> (batch_size, max_length, hidden_dim)

        output = self.fc(attn_out)  #output layer
        output_indices = torch.argmax(output, dim=-1)  #Convert from continous to discrete indices for Discriminator emvedding layer
        return output_indices 

class Discriminator(nn.Module):
    def __init__(self, vocab_size: int, embedding_dim: int, hidden_dim: int, max_length: int, num_heads: int, num_layers: int, dropout_prob: float,
                bidirectional: bool) -> None:
        '''
        Bidirectional Recurrent Discriminator with Attention Layer
        '''
        super(Discriminator, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=num_layers, dropout=dropout_prob, batch_first=True, bidirectional=bidirectional)

        ##if bidirectional, then reduce the hidden dimensionality using fully connected of hid_size/2
        if bidirectional:
            self.fc_reduce = nn.Linear(hidden_dim * 2, hidden_dim)  #reduce dim
        else:
            self.fc_reduce = nn.Linear(hidden_dim, hidden_dim)  #don't reduce dim

        ##attend
        self.attention = nn.MultiheadAttention(hidden_dim, num_heads, dropout=dropout_prob)
        self.fc = nn.Linear(hidden_dim, 1)
        self.dropout = nn.Dropout(dropout_prob)
        self.max_length = max_length

    def forward(self, x: torch.tensor) -> torch.tensor:
        '''
        Builds graph and forwards
        '''
        embedded = self.embedding(x)  #shape: (batch_size, max_length, embedding_dim)
        gru_out, _ = self.gru(embedded)  #shape: (batch_size, max_length, hidden_dim)
        gru_out = self.fc_reduce(gru_out)  #reduce dimension -> hidden_dim
        gru_out = self.dropout(gru_out)

        ##ulti-head attention
        attn_input = gru_out.transpose(0, 1)  #transpose -> (max_length, batch_size, hidden_dim)
        attn_out, _ = self.attention(attn_input, attn_input, attn_input)  #self-attention
        attn_out = attn_out.transpose(0, 1)  #transpose -> (batch_size, max_length, hidden_dim)

        ##take the last output from the sequence (many-to-one)
        output = self.fc(attn_out[:, -1, :])  ##hape: (batch_size, 1)
        return torch.sigmoid(output)  ##igmoid for binary classification


### Define Hyperparameter Tune Trainer

In [7]:
def train_model(config) -> tuple:
    ##binary loss init
    criterion = torch.nn.BCELoss()
    ##init dataloaders
    ##runs slightly faster using linux, because able to use num_workers > 0
    if LINUX:
        dataloader = DataLoader(dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True, prefetch_factor=PREFETCH_FACTOR, num_workers=NUM_WORKERS)
    else:
        ##windows
        dataloader = DataLoader(dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)

    #init models with current hyperparameters
    n_gen = Generator(vocab_size=len(vocab) + 1, 
                      embedding_dim=config['embedding_dim'], 
                      hidden_dim=config['hidden_dim'], 
                      max_length=max_length, 
                      num_heads=config['num_heads'], 
                      num_layers=config['num_layers'], 
                      dropout_prob=config['dropout_prob'], 
                      bidirectional=config['bidirectional']).to(DEVICE)

    n_disc = Discriminator(vocab_size=len(vocab) + 1, 
                           embedding_dim=config['embedding_dim'], 
                           hidden_dim=config['hidden_dim'], 
                           max_length=max_length, 
                           num_heads=config['num_heads'], 
                           num_layers=config['num_layers'], 
                           dropout_prob=config['dropout_prob'], 
                           bidirectional=config['bidirectional']).to(DEVICE)

    ##init optimizers
    if config['optimizer'] == 'NAdam':
        g_opt = torch.optim.NAdam(n_gen.parameters(), lr=config['lr_g'], betas=BETAS, weight_decay=WEIGHT_DECAY)
        d_opt = torch.optim.NAdam(n_disc.parameters(), lr=config['lr_d'], betas=BETAS, weight_decay=WEIGHT_DECAY)
    elif config['optimizer'] == 'rmsprop':
        g_opt = torch.optim.RMSprop(n_gen.parameters(), lr=config['lr_g'], weight_decay=WEIGHT_DECAY)
        d_opt = torch.optim.RMSprop(n_disc.parameters(), lr=config['lr_d'], weight_decay=WEIGHT_DECAY)
    
    ##init schedulers
    g_schedule = torch.optim.lr_scheduler.StepLR(g_opt, step_size=5, gamma=0.99)
    d_schedule = torch.optim.lr_scheduler.StepLR(d_opt, step_size=5, gamma=0.99)

    history = {'epoch': [] ,'g_loss': [], 'd_loss': []}
    for epoch in range(config['n_epochs']):
        t1 = time.time()
        for real_smiles in dataloader:
            batch_size = real_smiles.size(0)

            ##according to WGAN, this should help to stabalize training, original number was 5 -times disc updates to every 1 gen updates
            if config['n_disc_steps']:
                for i in range(config['n_disc_steps']):
                    ##-----train discriminator-----
                    d_opt.zero_grad()
        
                    ##generate real and fake labels (real are ones and fake are zeros)
                    real_labels = torch.ones(batch_size, 1).to(DEVICE)
                    fake_labels = torch.zeros(batch_size, 1).to(DEVICE)
        
                    ##gen real smiles and forward pass discriminator
                    real_smiles = real_smiles.to(DEVICE)
                    real_outputs = n_disc(real_smiles)
        
                    ##calc binary loss
                    d_loss_real = criterion(real_outputs, real_labels)

                    ##generate noise
                    noise = torch.randint(1, n_gen.max_length, (batch_size, n_gen.max_length)).to(DEVICE)
        
                    ##get fake smiles by forward passing generator
                    fake_smiles = n_gen(noise)
                    fake_smiles = fake_smiles.long()
        
                    ##show fake smiles to discriminator then calculate loss based on what the discriminator thinks is fake
                    fake_outputs = n_disc(fake_smiles)
                    d_loss_fake = criterion(fake_outputs, fake_labels)
                    
                    ##TODO: Add gradient clipping by following WGAN-GC
                    
                    ##sum loss, backprop, and update weights
                    d_loss = d_loss_real + d_loss_fake
                    d_loss.backward()
                    d_opt.step()

                    ##Apply weight clipping to discriminator
                    for p in n_disc.parameters():
                        p.data.clamp_(-config['clip_value'], config['clip_value'])

            ##-----Train Generator-----
            g_opt.zero_grad()

            ##reused noise vector
            fake_smiles = n_gen(noise)
            fake_smiles = fake_smiles.long()

            ###show fake smiles 
            fake_outputs = n_disc(fake_smiles)

            ##calculate the generated loss by comparing fake_outputs to real_labels
            ##want these two to have very low loss
            ##back prop and update weights
            g_loss = criterion(fake_outputs, real_labels)
            g_loss.backward()
            g_opt.step()

        ##Step Schedulers
        g_schedule.step()
        d_schedule.step()

        #record tuner
        ray.train.report(dict(loss=g_loss.item()))
    

### Define Tune Parameters and Tune

In [8]:
#early stopping shedule
asha_scheduler = ASHAScheduler(
    metric="loss", #what to optimize
    mode="min", #min or max
    max_t=MAX_T,  # Maximum number of iterations
    grace_period=PATIENCE,  # Minimum number of iterations to run before considering early stopping
    reduction_factor=REDUCTION_FACTOR  #top 50% of trials are retained after each round
)

##set search grid
config = {
            'lr_g': tune.loguniform(2e-5, 2e-2),  #generator lr
            'lr_d': tune.loguniform(2e-5, 2e-2),  #discriminator lr
            'batch_size': tune.choice([32, 64, 128]), #batch size
            'hidden_dim': tune.choice([16, 32, 64, 128]), #latent dim
            'embedding_dim': tune.choice([16, 32, 64]), #embedding size to represent embedding vector
            'num_layers': tune.choice([2, 3]), #num gru layers
            'dropout_prob': tune.choice([0.25, 0.5]), #neuron dropout probability
            'num_heads': tune.choice([2, 4, 8]), #num of attention heads
            'bidirectional': tune.choice([True, False]), #bidirection GRU
            'n_disc_steps': tune.choice([1, 3, 5]),  # Number of discriminator updates per generator update
            'clip_value': tune.loguniform(0.01, 0.001),  # Weight clipping value for WGAN
            'optimizer': 'rmsprop', #optimizers
            'n_epochs': 25,  ##max epochs to run
            }

##kickoff search
analysis = tune.run(
    tune.with_parameters(train_model), #train function
    resources_per_trial={"cpu": 2, "gpu": 1}, ##num resources to alot per experiment
    config=config, #ray tune config
    num_samples=NUM_SAMPLES, #number of experiments
    max_concurrent_trials=MAX_CONCURRENT_TRIALS,
    scheduler=asha_scheduler, #number of epochs
    local_dir=WRITEPATH, #where to write
    verbose=2, #verbosity of output
    name=WRITEPATH.split('/')[-1], #what to call experiment
    )

2024-08-20 08:11:49,566	INFO worker.py:1752 -- Started a local Ray instance.
2024-08-20 08:11:50,275	INFO tune.py:263 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2024-08-20 08:11:50,278	INFO tune.py:613 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-08-20 23:37:53
Running for:,15:26:03.03
Memory:,12.4/62.6 GiB

Trial name,status,loc,batch_size,bidirectional,clip_value,dropout_prob,embedding_dim,hidden_dim,lr_d,lr_g,n_disc_steps,num_heads,num_layers,iter,total time (s),loss
train_model_61475_00000,TERMINATED,172.27.161.222:455360,64,False,0.00243267,0.5,32,16,8.69967e-05,0.00758527,5,4,2,25,1963.81,0.693126
train_model_61475_00001,TERMINATED,172.27.161.222:455361,128,True,0.00512138,0.25,16,16,0.00133899,5.32777e-05,1,2,2,25,420.668,0.693484
train_model_61475_00002,TERMINATED,172.27.161.222:459775,32,True,0.00508872,0.25,32,64,0.000183779,5.80158e-05,3,4,2,25,1692.96,0.693191
train_model_61475_00003,TERMINATED,172.27.161.222:471651,32,True,0.00362076,0.25,16,128,2.99684e-05,0.00361645,5,2,3,25,3828.22,0.693154
train_model_61475_00004,TERMINATED,172.27.161.222:473010,128,True,0.00117356,0.5,64,64,0.00260733,0.000376708,3,4,3,5,242.897,0.69376
train_model_61475_00005,TERMINATED,172.27.161.222:475072,128,True,0.00429022,0.25,16,16,0.00185626,0.00293273,3,8,3,5,201.344,0.693481
train_model_61475_00006,TERMINATED,172.27.161.222:476840,64,False,0.00130195,0.25,64,32,0.00505057,0.00105563,5,4,3,10,775.663,0.693826
train_model_61475_00007,TERMINATED,172.27.161.222:482681,64,False,0.00812146,0.5,16,16,0.0137352,3.9731e-05,5,4,3,10,702.006,0.696493
train_model_61475_00008,TERMINATED,172.27.161.222:487986,128,True,0.00277048,0.5,32,16,5.34766e-05,0.00087208,3,2,3,25,960.279,0.693134
train_model_61475_00009,TERMINATED,172.27.161.222:495327,128,False,0.0068612,0.25,64,32,3.7109e-05,0.0151072,1,8,2,25,452.39,0.693138




Trial name,loss
train_model_61475_00000,0.693126
train_model_61475_00001,0.693484
train_model_61475_00002,0.693191
train_model_61475_00003,0.693154
train_model_61475_00004,0.69376
train_model_61475_00005,0.693481
train_model_61475_00006,0.693826
train_model_61475_00007,0.696493
train_model_61475_00008,0.693134
train_model_61475_00009,0.693138


2024-08-20 23:37:53,460	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/home/anth005/ray_results/_tune_20-08-2024_08-11-46' in 0.0268s.
2024-08-20 23:37:53,500	INFO tune.py:1048 -- Total run time: 55563.22 seconds (55563.00 seconds for the tuning loop).


In [9]:
##get best trials info
best_trial = analysis.get_best_trial(metric="loss", mode="min", scope="all")
best_config = analysis.get_best_config(metric="loss", mode="min", scope="all")
best_result = analysis.get_best_trial(metric="loss", mode="min").last_result
print(f"Best trial config: {best_config}")
print(f"Best trial final loss: {best_result['loss']}")

Best trial config: {'lr_g': 0.004086800025392213, 'lr_d': 0.013448070902660135, 'batch_size': 128, 'hidden_dim': 128, 'embedding_dim': 32, 'num_layers': 2, 'dropout_prob': 0.5, 'num_heads': 4, 'bidirectional': True, 'n_disc_steps': 3, 'clip_value': 0.008101987508250374, 'optimizer': 'rmsprop', 'n_epochs': 25}
Best trial final loss: 0.6910346150398254


In [10]:
#export to pandas and sace
df = analysis.results_df
df.to_csv(WRITEPATH + "/tune_results.csv")
df.head()

Unnamed: 0_level_0,loss,timestamp,checkpoint_dir_name,done,training_iteration,date,time_this_iter_s,time_total_s,pid,hostname,...,config/hidden_dim,config/embedding_dim,config/num_layers,config/dropout_prob,config/num_heads,config/bidirectional,config/n_disc_steps,config/clip_value,config/optimizer,config/n_epochs
trial_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
61475_00000,0.693126,1724157877,,True,25,2024-08-20_08-44-37,79.357296,1963.805997,455360,WUSSYRD230001,...,16,32,2,0.5,4,False,5,0.002433,rmsprop,25
61475_00001,0.693484,1724156334,,True,25,2024-08-20_08-18-54,16.904019,420.667542,455361,WUSSYRD230001,...,16,16,2,0.25,2,True,1,0.005121,rmsprop,25
61475_00002,0.693191,1724158031,,True,25,2024-08-20_08-47-11,67.267845,1692.959087,459775,WUSSYRD230001,...,64,32,2,0.25,4,True,3,0.005089,rmsprop,25
61475_00003,0.693154,1724161708,,True,25,2024-08-20_09-48-28,150.649411,3828.215714,471651,WUSSYRD230001,...,128,16,3,0.25,2,True,5,0.003621,rmsprop,25
61475_00004,0.69376,1724158277,,True,5,2024-08-20_08-51-17,47.855909,242.896719,473010,WUSSYRD230001,...,64,64,3,0.5,4,True,3,0.001174,rmsprop,25


### Check Validity

In [11]:
##clean and time
gc.collect()
torch.cuda.empty_cache()
tf = time.time()
print('Total Runtime: ', np.round(tf - t0, 3))

Total Runtime:  55571.995


### REFERENCES

Alqahtani, H. E. (2019, Dec 19). Applications of Generative Adversarial Networks (GANS): An Updated Review.

Arjovsky, M. E. (2017, Jan 26). Wasserstein GAN.

Bidisha, S. (2019). NeVAE: A Deep Generative Model for Molecular Graphs.

Goodfellow, E. A. (2014, June 10). Generative Adversarial Networks.

Jaun-Ni Wu, E. A. (2024). t-SMILES: A Fragment-based Molecular Representation Framework for De Novo Ligand Design. Hunan, China.

National Library of Medicine. (2024, July 22). National Center for Biotechnology Information. Retrieved from PubChem: https://pubchem.ncbi.nlm.nih.gov/

Nicola De Cao, T. K. (2018). MolGAN: An Implicit Generative Model for Small Molecular Graphs. Stochholm, Sweden.