# Hit-Generation by Fragment-Based Molecular Optimization

In [None]:
from optimizer import MolecularOptimizer
from library import Library
from genmol import GenMol_Generator
from oracle import Oracle

import pandas as pd

## Library

#### Read molecular library of FDA-approved drugs with 20~30 heavy atoms 

In [None]:
mol_library = pd.read_csv('./fda_drugs.csv')
mol_library

#### Initiate fragment library from known molecules

In [None]:
library = Library(max_fragments = 1000)

In [None]:
molecules = {r['Smiles']:r['QED Weighted'] for idx, r in mol_library.iterrows()}
library.update(molecules)

In [None]:
library.fragments

In [None]:
library.export(2)

## Oracle

In [None]:
oracle = Oracle(score = Oracle.RDKitScore('QED'))

In [None]:
oracle.evaluate(['CCS(=O)(=O)N1CC(CC#N)(C2CCOCC2C)C1'])

## Generator

In [None]:
generator = GenMol_Generator(invoke_url = 'http://127.0.0.1:8000/generate')

In [None]:
generator.inference(
    smiles = 'CCS2(=O)(=O).C134CN2C1.C3C#N.[*{10-10}]',
    num_molecules = 5,
    temperature = 1.5,
    noise = 2.0
)

In [None]:
generator.produce(['CC(NC(=O)C(=O)Nc1ccc(F)c(F)c1F)C(F)COc1cc(CNC(=O)NCc2cccnc2)ccn1'], 1)

## Optimizer

In [None]:
optimizer = MolecularOptimizer(library = library, oracle = oracle, generator = generator)

In [None]:
data = []

def eval():
    best = library.molecules.score[0]
    mean = library.molecules.head(50)['score'].mean()
    std = library.molecules.head(50)['score'].std()
    print("BEST", best, "MEAN", mean, "STD", std)
    data.append([best, mean, std])

eval()

for i in range(25):
    optimizer.run(iterations = 10, num_mutate = 50)
    eval()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame(data, columns = ['best', 'mean', 'std'])

plt.fill_between(df.index, df['mean']-df['std'], df['mean'] + df['std'], alpha=0.4)
plt.plot(df['mean'])
plt.plot(df['best'])