In [1]:
from ddbox.molecule import Molecule, MOLECULE_DESCRIPTORS

# Molecule Object & Descriptors

In [2]:
# Aspirin SMILES

ASPIRIN_INCHI = 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)'
ASPIRIN_SMILES = 'O=C(C)Oc1ccccc1C(=O)O'

In [3]:
# Molecule object from SMILES

molecule = Molecule.from_smiles(ASPIRIN_SMILES)
molecule.is_valid

True

In [4]:
# Molecule object from InChi

molecule = Molecule.from_inchi(ASPIRIN_INCHI)
molecule.is_valid

True

In [5]:
# Available descriptors

for name in MOLECULE_DESCRIPTORS:
    print('%s: %s' % (name, molecule[name]))

BalabanJ: 3.0435273546341013
BertzCT: 343.22286772671646
Chi0: 9.844934982691242
Chi0n: 6.981359543650051
Chi0v: 6.981359543650051
Chi1: 6.1090609052806215
Chi1n: 3.6174536478673316
Chi1v: 3.6174536478673316
Chi2n: 2.394955678320673
Chi2v: 2.394955678320673
Chi3n: 1.3711546649445034
Chi3v: 1.3711546649445034
Chi4n: 0.887171219237414
Chi4v: 0.887171219237414
EState_VSA1: 11.938610575903699
EState_VSA10: 9.589074368143644
EState_VSA11: 0.0
EState_VSA2: 11.3129633249809
EState_VSA3: 0.0
EState_VSA4: 0.0
EState_VSA5: 19.056471336613843
EState_VSA6: 12.13273413692322
EState_VSA7: 0.0
EState_VSA8: 4.736862953800049
EState_VSA9: 5.106527394840706
ExactMolWt: 180.042258736
FpDensityMorgan1: 1.3076923076923077
FpDensityMorgan2: 1.9230769230769231
FpDensityMorgan3: 2.4615384615384617
FractionCSP3: 0.1111111111111111
HallKierAlpha: -1.8399999999999999
HeavyAtomCount: 13
HeavyAtomMolWt: 172.09499999999997
Ipc: 729.6807528797516
Kappa1: 9.249605734767023
Kappa2: 3.7092512583454584
Kappa3: 2.2974150

In [6]:
molecule['qed']

0.5501217966938848

# Moses metrics

In [7]:
from pprint import pprint

In [8]:
from ddbox.metrics.benchmarks.moses import compute_metrics


smiles_list_a = ["COc1ccc(CNS(=O)(=O)c2ccc(Cl)cc2)cc1", "CCN(Cc1ccccc1)S(=O)(=O)c1ccc(C)cc1", "COc1ccccc1N1CCN(S(=O)(=O)c2ccc(C)cc2)CC1", "C=CCN(CC=C)S(=O)(=O)c1ccc(C)cc1", "Cc1ccc(S(=O)(=O)N2CCCC(C)C2)cc1", "Cc1ccc(S(=O)(=O)N2CCc3ccccc3C2)cc1", "COc1ccc(S(=O)(=O)N(CCO)Cc2ccccc2)cc1", "COc1ccc(S(=O)(=O)N(CCO)C2CCCCC2)cc1", "C=CCN(CC=C)S(=O)(=O)c1ccc(OC)cc1", "COc1ccc(S(=O)(=O)N2CCCC(C)C2)cc1", "COc1ccc(S(=O)(=O)NCc2ccccn2)cc1", "COc1ccc(S(=O)(=O)NCC2CCCO2)cc1", "Cc1cc(C)c(S(=O)(=O)N(CCO)Cc2ccccc2)c(C)c1", "Cc1cc(C)c(S(=O)(=O)NCC(C)C)c(C)c1"]
smiles_list_b = ["COc1cccc(OC)c1C(=O)N1CCOCC1", "Cc1cc(Br)ccc1NC(=O)c1cccnc1", "O=C(Nc1ccncc1)c1cccc(C(=O)Nc2ccncc2)c1", "Cc1cccnc1NC(=O)c1cccc(C(=O)Nc2ncccc2C)c1", "O=C(Nn1cnnc1)c1cccc(C(=O)Nn2cnnc2)c1", "CC(=O)Nc1cccc(NC(=O)c2cccs2)c1", "O=C(Nc1ccncc1)c1ccc(C(=O)Nc2ccncc2)cc1", "O=C(Nc1cccnc1)c1ccc(C(=O)Nc2cccnc2)cc1", "Cc1ccc(S(=O)(=O)Nc2cccc(F)c2)cc1", "COc1ccc(S(=O)(=O)Nc2cccc(NC(C)=O)c2)cc1", "COc1ccc(S(=O)(=O)Nc2ncccn2)cc1", "COC(=O)CNS(=O)(=O)c1c(C)cc(C)cc1C", "CC(=O)Nc1cccc(NS(=O)(=O)c2c(C)cc(C)cc2C)c1", "O=S(=O)(Nc1cccc(F)c1)c1ccc(F)cc1", "CC(=O)Nc1cccc(NS(=O)(=O)c2ccc(F)cc2)c1", "CC1CN(S(=O)(=O)c2ccc(F)cc2)CC(C)O1", "Cc1cccc(=NS(=O)(=O)c2cc(Cl)ccc2Cl)[nH]1", "O=S(=O)(N=c1cccc[nH]1)c1cc(Cl)ccc1Cl"]

metrics = compute_metrics(smiles_list_a, smiles_list_b)

In [9]:
for metric in metrics:
    print('%s: %s' % (metric, metrics[metric]))

fraction_valid: 1.0
fraction_unique: 1.0
fraction_passes: 1.0
IntDiv: 0.6691153390066964
IntDiv2: 0.6025137673444122
FCD: 59.23197834916648
SNN: 0.2715294112761815
Frag: 0.33653338167428815
Scaf: 0.0
DistributionDifferenceLogP: 0.48885730158730173
DistributionDifferenceSA: 0.2154795412301325
DistributionDifferenceQED: 0.03266650666066298
DistributionDifferenceWeight: 8.499793650793665


# Molecule Docking

In [10]:
from ddbox.docking.utils.vina import download_all_receptors

In [11]:
# Download all or do not call if you want to download one by one during usage

download_all_receptors()

100%|██████████| 58/58 [00:01<00:00, 36.78it/s]


In [12]:
# With center and size

pdbqt_content = molecule.dock('ABL1', center=[15.851, 14.647, 3.904], size=[30.000, 30.000, 30.000])
print(pdbqt_content)

MODEL 1
REMARK VINA RESULT:    -6.697      0.000      0.000
REMARK INTER + INTRA:          -8.695
REMARK INTER:                  -8.067
REMARK INTRA:                  -0.629
REMARK UNBOUND:                -0.629
REMARK  Name = 
REMARK  5 active torsions:
REMARK  status: ('A' for Active; 'I' for Inactive)
REMARK    1  A    between atoms: C_1  and  C_2
REMARK    2  A    between atoms: C_2  and  O_4
REMARK    3  A    between atoms: O_4  and  C_5
REMARK    4  A    between atoms: C_10  and  C_11
REMARK    5  A    between atoms: C_11  and  O_13
REMARK                            x       y       z     vdW  Elec       q    Type
REMARK                         _______ _______ _______ _____ _____    ______ ____
ROOT
ATOM      1  C   UNL     1      16.441  14.805   4.080  0.00  0.00    +0.000 A 
ATOM      2  C   UNL     1      15.454  15.100   5.026  0.00  0.00    +0.000 A 
ATOM      3  C   UNL     1      15.813  15.622   6.270  0.00  0.00    +0.000 A 
ATOM      4  C   UNL     1      17.150  15.877

In [13]:
# Use default size

pdbqt_content = molecule.dock('ABL1', center=[15.851, 14.647, 3.904])
print(pdbqt_content)

MODEL 1
REMARK VINA RESULT:    -6.721      0.000      0.000
REMARK INTER + INTRA:          -8.691
REMARK INTER:                  -8.097
REMARK INTRA:                  -0.594
REMARK UNBOUND:                -0.594
REMARK  Name = 
REMARK  5 active torsions:
REMARK  status: ('A' for Active; 'I' for Inactive)
REMARK    1  A    between atoms: C_1  and  C_2
REMARK    2  A    between atoms: C_2  and  O_4
REMARK    3  A    between atoms: O_4  and  C_5
REMARK    4  A    between atoms: C_10  and  C_11
REMARK    5  A    between atoms: C_11  and  O_13
REMARK                            x       y       z     vdW  Elec       q    Type
REMARK                         _______ _______ _______ _____ _____    ______ ____
ROOT
ATOM      1  C   UNL     1      16.409  14.794   4.070  0.00  0.00    +0.000 A 
ATOM      2  C   UNL     1      15.396  15.081   4.990  0.00  0.00    +0.000 A 
ATOM      3  C   UNL     1      15.716  15.621   6.236  0.00  0.00    +0.000 A 
ATOM      4  C   UNL     1      17.043  15.900

In [14]:
# Use default center

pdbqt_content = molecule.dock('ABL1', size=[30.000, 30.000, 30.000])
print(pdbqt_content)

MODEL 1
REMARK VINA RESULT:    -6.554      0.000      0.000
REMARK INTER + INTRA:          -8.539
REMARK INTER:                  -7.895
REMARK INTRA:                  -0.644
REMARK UNBOUND:                -0.644
REMARK  Name = 
REMARK  5 active torsions:
REMARK  status: ('A' for Active; 'I' for Inactive)
REMARK    1  A    between atoms: C_1  and  C_2
REMARK    2  A    between atoms: C_2  and  O_4
REMARK    3  A    between atoms: O_4  and  C_5
REMARK    4  A    between atoms: C_10  and  C_11
REMARK    5  A    between atoms: C_11  and  O_13
REMARK                            x       y       z     vdW  Elec       q    Type
REMARK                         _______ _______ _______ _____ _____    ______ ____
ROOT
ATOM      1  C   UNL     1      16.528  14.867   4.087  0.00  0.00    +0.000 A 
ATOM      2  C   UNL     1      15.520  15.179   5.007  0.00  0.00    +0.000 A 
ATOM      3  C   UNL     1      15.851  15.772   6.228  0.00  0.00    +0.000 A 
ATOM      4  C   UNL     1      17.177  16.082

In [15]:
# Use default center and size

pdbqt_content = molecule.dock('ABL1')
print(pdbqt_content)

MODEL 1
REMARK VINA RESULT:    -6.702      0.000      0.000
REMARK INTER + INTRA:          -8.690
REMARK INTER:                  -8.073
REMARK INTRA:                  -0.617
REMARK UNBOUND:                -0.617
REMARK  Name = 
REMARK  5 active torsions:
REMARK  status: ('A' for Active; 'I' for Inactive)
REMARK    1  A    between atoms: C_1  and  C_2
REMARK    2  A    between atoms: C_2  and  O_4
REMARK    3  A    between atoms: O_4  and  C_5
REMARK    4  A    between atoms: C_10  and  C_11
REMARK    5  A    between atoms: C_11  and  O_13
REMARK                            x       y       z     vdW  Elec       q    Type
REMARK                         _______ _______ _______ _____ _____    ______ ____
ROOT
ATOM      1  C   UNL     1      16.450  14.791   4.057  0.00  0.00    +0.000 A 
ATOM      2  C   UNL     1      15.444  15.071   4.988  0.00  0.00    +0.000 A 
ATOM      3  C   UNL     1      15.772  15.617   6.228  0.00  0.00    +0.000 A 
ATOM      4  C   UNL     1      17.099  15.910

# Moses Benchmark Dataset

In [16]:
from ddbox.data.torch import MosesDataset

In [17]:
dataset = MosesDataset()

100%|██████████| 32/32 [00:10<00:00,  3.14it/s]


In [18]:
len(dataset)

1584663

In [19]:
dataset[:20]

[['CCCS(=O)c1ccc2[nH]c(=NC(=O)OC)[nH]c2c1'],
 ['CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n1ccnc1'],
 ['Cc1c(Cl)cccc1Nc1ncccc1C(=O)OCC(O)CO'],
 ['Cn1cnc2c1c(=O)n(CC(O)CO)c(=O)n2C'],
 ['CC1Oc2ccc(Cl)cc2N(CC(O)CO)C1=O'],
 ['CCOC(=O)c1cncn1C1CCCc2ccccc21'],
 ['COc1ccccc1OC(=O)Oc1ccccc1OC'],
 ['O=C1Nc2ccc(Cl)cc2C(c2ccccc2Cl)=NC1O'],
 ['CN1C(=O)C(O)N=C(c2ccccc2Cl)c2cc(Cl)ccc21'],
 ['CCC(=O)c1ccc(OCC(O)CO)c(OC)c1'],
 ['Cc1nc2c([nH]1)c(=O)n(C)c(=O)n2CC1CC=CCC1'],
 ['COc1cc2c(cc1O)N=CC1CCC(O)N1C2=O'],
 ['COc1c(C)cnc(CS(=O)c2nc3ccccc3[nH]2)c1C'],
 ['COc1cc(C)c(Cc2cnc(N)nc2N)cc1OC'],
 ['O=C1Nc2ccc(Cl)cc2C(c2ccccc2)=NC1O'],
 ['CC1CC(OC(=O)CN2CCCC2=O)CC(C)(C)C1'],
 ['O=C(C1CCCCC1)N1CC(=O)N2CCc3ccccc3C2C1'],
 ['COC(=O)c1c[nH]c2cc(OC(C)C)c(OC(C)C)cc2c1=O'],
 ['CCC1NC(=O)c2cc(S(N)(=O)=O)c(Cl)cc2N1'],
 ['CN1C(=O)C(O)N=C(c2ccccc2)c2cc(Cl)ccc21']]

In [20]:
dataset[:10:1]

[['CCCS(=O)c1ccc2[nH]c(=NC(=O)OC)[nH]c2c1'],
 ['CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n1ccnc1'],
 ['Cc1c(Cl)cccc1Nc1ncccc1C(=O)OCC(O)CO'],
 ['Cn1cnc2c1c(=O)n(CC(O)CO)c(=O)n2C'],
 ['CC1Oc2ccc(Cl)cc2N(CC(O)CO)C1=O'],
 ['CCOC(=O)c1cncn1C1CCCc2ccccc21'],
 ['COc1ccccc1OC(=O)Oc1ccccc1OC'],
 ['O=C1Nc2ccc(Cl)cc2C(c2ccccc2Cl)=NC1O'],
 ['CN1C(=O)C(O)N=C(c2ccccc2Cl)c2cc(Cl)ccc21'],
 ['CCC(=O)c1ccc(OCC(O)CO)c(OC)c1']]

In [21]:
dataset = MosesDataset(split='test')

  0%|          | 0/4 [00:00<?, ?it/s]

100%|██████████| 4/4 [00:01<00:00,  3.34it/s]


In [22]:
dataset[:10]

[['CC1C2CCC(C2)C1CN(CCO)C(=O)c1ccc(Cl)cc1'],
 ['COc1ccc(-c2cc(=O)c3c(O)c(OC)c(OC)cc3o2)cc1O'],
 ['CCOC(=O)c1ncn2c1CN(C)C(=O)c1cc(F)ccc1-2'],
 ['Clc1ccccc1-c1nc(-c2ccncc2)no1'],
 ['CC(C)(Oc1ccc(Cl)cc1)C(=O)OCc1cccc(CO)n1'],
 ['Cc1nc2c(OCc3ccccc3)cccn2c1CC#N'],
 ['O=C1c2cccnc2CN1Cc1ccccc1Cl'],
 ['O=C(NC1CCc2ccccc2C1)c1ccncc1'],
 ['OC(Cn1cncn1)(Cn1cncn1)c1ccc(F)cc1F'],
 ['Cc1[nH]cnc1Cc1nc(-c2ccccc2)cs1']]

In [23]:
dataset = MosesDataset(attributes=['smiles', 'qed', 'Chi0'])

100%|██████████| 32/32 [00:19<00:00,  1.65it/s]


In [24]:
for row in dataset[:10]:
    print(row)

['CCCS(=O)c1ccc2[nH]c(=NC(=O)OC)[nH]c2c1', '0.8968983095288725', '13.828062645816686']
['CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n1ccnc1', '0.8622594770545061', '14.750712376627062']
['Cc1c(Cl)cccc1Nc1ncccc1C(=O)OCC(O)CO', '0.7010222128157715', '16.81962647737941']
['Cn1cnc2c1c(=O)n(CC(O)CO)c(=O)n2C', '0.6460832424087162', '13.4472292782632']
['CC1Oc2ccc(Cl)cc2N(CC(O)CO)C1=O', '0.8534567655454853', '13.284092571446669']
['CCOC(=O)c1cncn1C1CCCc2ccccc21', '0.8053055258190576', '13.94938298937633']
['COc1ccccc1OC(=O)Oc1ccccc1OC', '0.6322560961830345', '14.372032720186704']
['O=C1Nc2ccc(Cl)cc2C(c2ccccc2Cl)=NC1O', '0.8476978945833689', '14.98276318419594']
['CN1C(=O)C(O)N=C(c2ccccc2Cl)c2cc(Cl)ccc21', '0.8705347056850515', '15.853006672199015']
['CCC(=O)c1ccc(OCC(O)CO)c(OC)c1', '0.7104140353000475', '13.543605595440512']
