In [1]:
import pickle
import time
import json
import requests
import numpy as np
import matplotlib.pyplot as plt
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import graph_ga.sa_goal_directed_generation as ga

%matplotlib inline

Using TensorFlow backend.


# Test case

In [2]:
def objective(hps):
    return {
        'loss': (hps['x'] ** 2) + (hps['y'] ** 2),
        'status': STATUS_OK,
        # -- store other results like this
        'eval_time': time.time(),
        'other_stuff': {'type': None, 'value': [0, 1, 2]},
        # -- attachments are handled differently
        'attachments':
            {'time_module': pickle.dumps(time.time)}
        }
trials = Trials()

In [3]:
best = fmin(objective,
    space={
        'x': hp.uniform('x', -10, 10),
        'y': hp.uniform('y', -10, 10)
    },
    algo=tpe.suggest,
    max_evals=1000,
    trials=trials)

print(best)

100%|██████████| 1000/1000 [00:06<00:00, 150.94it/s, best loss: 0.005955677566835996]
{'x': -0.048539265818617124, 'y': -0.059996810253759525}


In [11]:
trials.losses()

[17.14925890800539,
 42.38683610858323,
 20.195975365525488,
 22.144166874714657,
 0.709669147279038,
 0.20855744509078808,
 0.0009402394978576315,
 32.55474669881439,
 91.96387396157859,
 3.301790599297985,
 95.54563219211498,
 95.19106678993246,
 31.530818318781375,
 6.929007987730084,
 90.10808867576182,
 75.90117198157849,
 5.856929567681426,
 17.655254632649417,
 2.6711997575467157,
 97.65079504776514,
 0.9837213059792451,
 1.2622879092604993,
 57.13579032503423,
 5.563734098240382,
 46.995056284468376,
 0.019425543174544046,
 10.785533992638523,
 7.444074497690113,
 0.926349071627428,
 57.18376642981526,
 17.190284257373758,
 15.53152622477571,
 0.05167094864518528,
 39.719221576590776,
 25.79221336751265,
 27.751638586667298,
 1.5347805300264075,
 8.395621192539982,
 8.785603469484945,
 0.05667850128963035,
 46.188153299115214,
 59.538022409589715,
 12.884138677341221,
 4.7571367901521135,
 70.22651360178033,
 2.922434259699616,
 13.669433461770875,
 0.6808077099657045,
 0.11508

# Real Case

In [2]:
def evaluate(hps):
    ga.bo_main(hps['mu'], hps['sigma'])
    with open('./graph_ga/sa_goal_directed_results.json', 'r') as f:
        results = json.load(f)
        
    NUM = 10
    HOST = 'https://35.202.13.65'

    mols = []
    props = []

    for i in range(len(results['results'])):
        temp = []
        for j in range(NUM):
            temp.append(results['results'][i]['optimized_molecules'][j][1])
        props.append(np.mean(np.array(temp)))

    for i in range(len(results['results'])):
        for j in range(NUM):
            mols.append(results['results'][i]['optimized_molecules'][j][0])
            
    synth = []

    for _ in range(len(results['results'])):

        num_good = 0
        num_total = 0
        num_find = 0

        for i in range(len(mols)):
            # Check if is buyable
            params = {
                'smiles': mols[i] # required
            }
            resp = requests.get(HOST+'/api/price/', params=params, verify=False)

            if resp.json()['price'] == 0:
                # Call tree builder
                params = {
                    'smiles': mols[i],  # required

                    # optional with defaults shown
                    'max_depth': 8,
                    'max_branching': 25,
                    'expansion_time': 60,
                    'max_ppg': 100,
                    'template_count': 1000,
                    'max_cum_prob': 0.999,
                    'chemical_property_logic': 'none',
                    'max_chemprop_c': 0,
                    'max_chemprop_n': 0,
                    'max_chemprop_o': 0,
                    'max_chemprop_h': 0,
                    'chemical_popularity_logic': 'none',
                    'min_chempop_reactants': 5,
                    'min_chempop_products': 5,
                    'filter_threshold': 0.1,

                    'return_first': 'true'  # default is false
                }

                for _ in range(15):
                    print('Trying to send the request, for the %i times now' % (_ + 1))
                    resp = requests.get(HOST + '/api/treebuilder/', params=params, verify=False)
                    if 'error' not in resp.json().keys():
                        print(f'A result is found!, Total path: {len(resp.json()["trees"])}')
                        num_good += 1
                        if len(resp.json()['trees']) != 0:
                            num_find += 1
                        break
                    # pprint(resp.json())
            else:
                num_good += 1
                num_find += 1

            num_total += 1

        synth.append(num_find / num_total)
    
    loss = np.array(props) * np.array(synth)
    return 1 - loss.mean()

In [3]:
SPACE = {
    'mu': hp.uniform('mu', 1, 5),
    'sigma': hp.uniform('sigma', 0.01, 5)
}

def objective(hps):
    return {
        'loss': evaluate(hps),
        'status': STATUS_OK,
        # -- store other results like this
        'eval_time': time.time(),
        'other_stuff': {'type': None, 'value': [0, 1, 2]},
        # -- attachments are handled differently
        'attachments':
            {'time_module': pickle.dumps(time.time)}
        }
trials = Trials()

In [None]:
best = fmin(objective,
    space=SPACE,
    algo=tpe.suggest,
    max_evals=10,
    trials=trials)

print(best)

  0%|          | 0/10 [00:00<?, ?it/s, best loss: ?]

INFO : Benchmarking goal-directed molecule generation, version test
INFO : Number of benchmarks: 2
INFO : Running benchmark 1/2: QED




selecting initial population...
0 | max: 0.948 | avg: 0.944 | min: 0.942 | std: 0.001 | sum: 94.422 | 0.61 sec/gen | 163.17 mol/sec
1 | max: 0.948 | avg: 0.944 | min: 0.942 | std: 0.001 | sum: 94.425 | 0.95 sec/gen | 105.00 mol/sec
2 | max: 0.948 | avg: 0.944 | min: 0.942 | std: 0.001 | sum: 94.429 | 1.04 sec/gen | 95.77 mol/sec
3 | max: 0.948 | avg: 0.944 | min: 0.942 | std: 0.001 | sum: 94.432 | 0.84 sec/gen | 119.25 mol/sec
4 | max: 0.948 | avg: 0.944 | min: 0.942 | std: 0.001 | sum: 94.433 | 0.77 sec/gen | 130.09 mol/sec
5 | max: 0.948 | avg: 0.944 | min: 0.943 | std: 0.001 | sum: 94.446 | 0.88 sec/gen | 113.72 mol/sec
6 | max: 0.948 | avg: 0.944 | min: 0.943 | std: 0.001 | sum: 94.446 | 0.88 sec/gen | 113.48 mol/sec
Failed to progress: 1
7 | max: 0.948 | avg: 0.944 | min: 0.943 | std: 0.001 | sum: 94.446 | 0.80 sec/gen | 124.88 mol/sec
8 | max: 0.948 | avg: 0.945 | min: 0.943 | std: 0.001 | sum: 94.452 | 0.85 sec/gen | 117.18 mol/sec
9 | max: 0.948 | avg: 0.945 | min: 0.943 | std:

INFO : Results for the benchmark "QED":
INFO :   Score: 0.947517
INFO :   Execution time: 0:03:58
INFO :   Metadata: {'top_1': 0.9481523561835619, 'top_10': 0.9479986065912704, 'top_100': 0.946399074386962, 'internal_similarity_max': 0.8571428571428571, 'internal_similarity_mean': 0.17413000109211987, 'internal_similarity_histogram_density': ([1.378, 6.442, 1.5359999999999994, 0.21000000000000005, 0.08200000000000002, 0.07599999999999993, 0.14, 0.10600000000000002, 0.030000000000000006, 0.0],), 'internal_similarity_histogram_bins': ([0.0, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6000000000000001, 0.7000000000000001, 0.8, 0.9, 1.0],)}
INFO : Running benchmark 2/2: Zaleplon MPO




selecting initial population...
0 | max: 0.505 | avg: 0.454 | min: 0.435 | std: 0.014 | sum: 45.419 | 0.96 sec/gen | 104.06 mol/sec
1 | max: 0.505 | avg: 0.455 | min: 0.435 | std: 0.014 | sum: 45.459 | 0.94 sec/gen | 106.04 mol/sec
2 | max: 0.505 | avg: 0.455 | min: 0.436 | std: 0.014 | sum: 45.502 | 1.00 sec/gen | 99.53 mol/sec
3 | max: 0.505 | avg: 0.456 | min: 0.437 | std: 0.014 | sum: 45.606 | 0.93 sec/gen | 108.01 mol/sec
4 | max: 0.505 | avg: 0.456 | min: 0.437 | std: 0.014 | sum: 45.646 | 1.05 sec/gen | 94.89 mol/sec
5 | max: 0.516 | avg: 0.458 | min: 0.438 | std: 0.015 | sum: 45.758 | 1.18 sec/gen | 85.09 mol/sec
6 | max: 0.516 | avg: 0.458 | min: 0.439 | std: 0.015 | sum: 45.801 | 1.09 sec/gen | 91.39 mol/sec
7 | max: 0.516 | avg: 0.459 | min: 0.439 | std: 0.016 | sum: 45.926 | 0.90 sec/gen | 110.63 mol/sec
8 | max: 0.516 | avg: 0.460 | min: 0.439 | std: 0.016 | sum: 45.954 | 1.06 sec/gen | 94.29 mol/sec
9 | max: 0.516 | avg: 0.460 | min: 0.440 | std: 0.016 | sum: 45.995 | 1.0

INFO : Results for the benchmark "Zaleplon MPO":
INFO :   Score: 0.643954
INFO :   Execution time: 0:09:20
INFO :   Metadata: {'top_1': 0.6558258357839529, 'top_10': 0.654175893787709, 'top_100': 0.6218610704347308, 'internal_similarity_max': 0.8727272727272727, 'internal_similarity_mean': 0.4098090463788426, 'internal_similarity_histogram_density': ([0.1, 0.0, 0.9079999999999996, 4.57, 2.564000000000001, 1.0639999999999992, 0.5900000000000001, 0.16800000000000004, 0.036000000000000004, 0.0],), 'internal_similarity_histogram_bins': ([0.0, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6000000000000001, 0.7000000000000001, 0.8, 0.9, 1.0],)}
INFO : Finished execution of the benchmarks
INFO : Save results to file /Users/gaowh/PycharmProjects/ga_test/graph_ga/sa_goal_directed_results.json




Trying to send the request, for the 1 times now
A result is found!, Total path: 7
Trying to send the request, for the 1 times now
A result is found!, Total path: 6
Trying to send the request, for the 1 times now
A result is found!, Total path: 4
Trying to send the request, for the 1 times now
A result is found!, Total path: 4
Trying to send the request, for the 1 times now
A result is found!, Total path: 16
Trying to send the request, for the 1 times now
A result is found!, Total path: 118
Trying to send the request, for the 1 times now
A result is found!, Total path: 2
Trying to send the request, for the 1 times now
A result is found!, Total path: 5
Trying to send the request, for the 1 times now
A result is found!, Total path: 8
Trying to send the request, for the 1 times now
A result is found!, Total path: 1
Trying to send the request, for the 1 times now
A result is found!, Total path: 0
Trying to send the request, for the 1 times now
A result is found!, Total path: 0
Trying to sen

INFO : tpe_transform took 0.002978 seconds
INFO : TPE using 1/1 trials with best loss 0.559402
INFO : Benchmarking goal-directed molecule generation, version test
INFO : Number of benchmarks: 2
INFO : Running benchmark 1/2: QED




selecting initial population...
0 | max: 0.948 | avg: 0.944 | min: 0.943 | std: 0.001 | sum: 94.446 | 0.57 sec/gen | 174.57 mol/sec
Failed to progress: 1
1 | max: 0.948 | avg: 0.944 | min: 0.943 | std: 0.001 | sum: 94.446 | 1.09 sec/gen | 91.69 mol/sec
2 | max: 0.948 | avg: 0.945 | min: 0.943 | std: 0.001 | sum: 94.450 | 0.87 sec/gen | 115.15 mol/sec
3 | max: 0.948 | avg: 0.945 | min: 0.943 | std: 0.001 | sum: 94.459 | 0.84 sec/gen | 119.15 mol/sec
4 | max: 0.948 | avg: 0.945 | min: 0.943 | std: 0.001 | sum: 94.462 | 0.78 sec/gen | 127.86 mol/sec
5 | max: 0.948 | avg: 0.945 | min: 0.943 | std: 0.001 | sum: 94.469 | 0.99 sec/gen | 101.29 mol/sec
Failed to progress: 1
6 | max: 0.948 | avg: 0.945 | min: 0.943 | std: 0.001 | sum: 94.469 | 0.99 sec/gen | 101.47 mol/sec
7 | max: 0.948 | avg: 0.945 | min: 0.943 | std: 0.001 | sum: 94.471 | 0.77 sec/gen | 129.17 mol/sec
8 | max: 0.948 | avg: 0.945 | min: 0.943 | std: 0.001 | sum: 94.475 | 0.81 sec/gen | 123.60 mol/sec
9 | max: 0.948 | avg: 0.9

INFO : Results for the benchmark "QED":
INFO :   Score: 0.948195
INFO :   Execution time: 0:06:44
INFO :   Metadata: {'top_1': 0.9483769889558782, 'top_10': 0.9482911043881401, 'top_100': 0.9479155275905071, 'internal_similarity_max': 0.8837209302325582, 'internal_similarity_mean': 0.18618496960309308, 'internal_similarity_histogram_density': ([1.866, 5.49, 1.3519999999999996, 0.4620000000000001, 0.3940000000000001, 0.2219999999999998, 0.06200000000000001, 0.09000000000000002, 0.06200000000000001, 0.0],), 'internal_similarity_histogram_bins': ([0.0, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6000000000000001, 0.7000000000000001, 0.8, 0.9, 1.0],)}
INFO : Running benchmark 2/2: Zaleplon MPO




selecting initial population...
0 | max: 0.505 | avg: 0.454 | min: 0.435 | std: 0.014 | sum: 45.382 | 0.50 sec/gen | 198.23 mol/sec
1 | max: 0.526 | avg: 0.455 | min: 0.436 | std: 0.015 | sum: 45.512 | 1.04 sec/gen | 96.28 mol/sec
2 | max: 0.526 | avg: 0.456 | min: 0.436 | std: 0.015 | sum: 45.592 | 1.04 sec/gen | 96.15 mol/sec
Failed to progress: 1
3 | max: 0.526 | avg: 0.456 | min: 0.436 | std: 0.015 | sum: 45.592 | 1.03 sec/gen | 97.09 mol/sec
4 | max: 0.526 | avg: 0.457 | min: 0.437 | std: 0.016 | sum: 45.658 | 0.92 sec/gen | 108.19 mol/sec
5 | max: 0.526 | avg: 0.458 | min: 0.438 | std: 0.016 | sum: 45.752 | 1.09 sec/gen | 91.94 mol/sec
6 | max: 0.526 | avg: 0.458 | min: 0.439 | std: 0.016 | sum: 45.810 | 1.12 sec/gen | 89.68 mol/sec
Failed to progress: 1
7 | max: 0.526 | avg: 0.458 | min: 0.439 | std: 0.016 | sum: 45.810 | 0.98 sec/gen | 102.16 mol/sec
8 | max: 0.526 | avg: 0.459 | min: 0.439 | std: 0.016 | sum: 45.870 | 1.05 sec/gen | 95.28 mol/sec
9 | max: 0.526 | avg: 0.459 | 

INFO : Results for the benchmark "Zaleplon MPO":
INFO :   Score: 0.629728
INFO :   Execution time: 0:07:09
INFO :   Metadata: {'top_1': 0.6475761258027333, 'top_10': 0.635309681533008, 'top_100': 0.6062980192542485, 'internal_similarity_max': 0.8571428571428571, 'internal_similarity_mean': 0.44247442220750016, 'internal_similarity_histogram_density': ([0.1, 0.016, 1.0559999999999996, 2.7000000000000006, 3.032000000000001, 1.7079999999999986, 1.2020000000000002, 0.18000000000000005, 0.006000000000000001, 0.0],), 'internal_similarity_histogram_bins': ([0.0, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6000000000000001, 0.7000000000000001, 0.8, 0.9, 1.0],)}
INFO : Finished execution of the benchmarks
INFO : Save results to file /Users/gaowh/PycharmProjects/ga_test/graph_ga/sa_goal_directed_results.json




Trying to send the request, for the 1 times now
Trying to send the request, for the 2 times now
A result is found!, Total path: 0
Trying to send the request, for the 1 times now
A result is found!, Total path: 0
Trying to send the request, for the 1 times now
A result is found!, Total path: 0
Trying to send the request, for the 1 times now
A result is found!, Total path: 0
Trying to send the request, for the 1 times now
A result is found!, Total path: 0
Trying to send the request, for the 1 times now
A result is found!, Total path: 0
Trying to send the request, for the 1 times now
 10%|█         | 1/10 [56:00<5:20:12, 2134.71s/it, best loss: 0.5594020123957806]

# Play Ground

In [1]:
from guacamol_local.standard_benchmarks import hard_cobimetinib, similarity, logP_benchmark, cns_mpo, \
    qed_benchmark, median_camphor_menthol, novelty_benchmark, isomers_c11h24, isomers_c7h8n2o2, isomers_c9h10n2o2pf2cl, \
    frechet_benchmark, tpsa_benchmark, hard_osimertinib, hard_fexofenadine, weird_physchem, start_pop_ranolazine, \
    kldiv_benchmark, perindopril_rings, amlodipine_rings, sitagliptin_replacement, zaleplon_with_other_formula, valsartan_smarts, \
    median_tadalafil_sildenafil, decoration_hop, scaffold_hop, ranolazine_mpo, pioglitazone_mpo, sa_qed_benchmark
from guacamol_local.scoring_function import *
from rdkit import Chem
from functools import partial

from guacamol_local.common_scoring_functions import TanimotoScoringFunction, RdkitScoringFunction, CNS_MPO_ScoringFunction, \
    IsomerScoringFunction, SMARTSScoringFunction
from guacamol_local.distribution_learning_benchmark import DistributionLearningBenchmark, NoveltyBenchmark, KLDivBenchmark
from guacamol_local.frechet_benchmark import FrechetBenchmark
from guacamol_local.goal_directed_benchmark import GoalDirectedBenchmark
from guacamol_local.goal_directed_score_contributions import uniform_specification
from guacamol_local.score_modifier import MinGaussianModifier, MaxGaussianModifier, ClippedScoreModifier, GaussianModifier
from guacamol_local.scoring_function import ArithmeticMeanScoringFunction, GeometricMeanScoringFunction, ScoringFunction, \
    ScoringFunctionSAWrapper
from guacamol_local.utils.descriptors import num_rotatable_bonds, num_aromatic_rings, logP, qed, tpsa, bertz, mol_weight, \
    AtomCounter, num_rings
from guacamol_local.sa_modifier import LinearModifier, SAScoreModifier, SCScoreModifier, SmilesModifier

Using TensorFlow backend.


In [2]:
a = IsomerScoringFunction('C9H10N2O2PF2Cl', mean_function='geometric')

In [10]:
a.score('CNCCN(C)NCOCON')

0.004853161163813102

In [4]:
b = ScoringFunctionSAWrapper(a, SAScoreModifier())

In [9]:
b.score('CNCCN(C)NCOCON')

0.0028884464405169613

In [6]:
modifier = SAScoreModifier()

In [17]:
modifier('O=C(NCc1ccccc1Br)c1cc(Cl)on1', 1)

1.0