In [1]:
import ast
import subprocess
import numpy as np
import pandas as pd

In [6]:
def _buildCmd(smi_from, myMMPsDB, property=None, radius=-1):
    if property is None:
        gen_type = "generate"
        commandLine = ["mmpdb", f"{gen_type}", "--smiles", f"{smi_from}", f"{myMMPsDB}", "--radius", f"{radius}"]
        if radius in [0, 1, 2, 3, 4, 5]:
            commandLine.append("--radius")
            commandLine.append(f"{radius}")
    else:
        gen_type = "transform"
        commandLine = ["mmpdb", f"{gen_type}", "--smiles", f"{smi_from}", f"{myMMPsDB}", "-r", f"{radius}"]
        ##
        proplist = property.split(',')
        for prop in proplist:
            commandLine.append("--property")
            commandLine.append(f"{property}")
        ##
        if radius in [0, 1, 2, 3, 4, 5]:
            commandLine.append("-r")
            commandLine.append(f"{radius}")

    print(f'\tCommands:', ' '.join(commandLine))
    return commandLine

## --------------------------------------------------
def _runCmd(commandLine):
    dataDict = {}
    try:
        process = subprocess.Popen(commandLine, stdout=subprocess.PIPE)
        output, error = process.communicate()
        list_output = output.decode().split('\n')
    except Exception as e:
        print(f'\tCannot decode the output. Error msg: {e}')
    else:
        for i in range(len(list_output)):
            if list_output[i] != '':
                list_line = list_output[i].split('\t')
                if i == 0:
                    list_colNames = list_line
                    num_cols = len(list_colNames)
                else:
                    dataDict[i] = {}

                    if len(list_line) != num_cols:
                        print(f"Error, This row {i} has different number of cols to the header row, {list_output[i]}")
                    else:
                        for colid in range(len(list_colNames)):
                            col = list_colNames[colid]
                            dataDict[i][col] = list_line[colid]
    return dataDict

## --------------------------------------------------
def CleanResults(smi_from, myMMPsDB, property=None, radius=-1):
    ##
    commandLine = _buildCmd(smi_from=smi_from, myMMPsDB=myMMPsDB, property=property, radius=radius)
    dataDict = _runCmd(commandLine)
    dataTable = pd.DataFrame.from_dict(dataDict).T

    ##
    if property is None:
        renameCols = {
            'start': 'mol_start', 
            'final': 'mol_gen', 
            'constant': 'fragment_constant', 
            'from_smiles': 'fragment_from', 
            'to_smiles': 'fragment_to', 
            'r': 'radius', 
            'Rule_Info': 'Rule_Info'}
        dataTable["Rule_Info"] = dataTable["pair_from_id"] + '=>' + dataTable["pair_to_id"] + ' (N_Pairs=' + dataTable["#pairs"] + ')'

    else:
        renameCols = {
            'start': 'mol_start',
            'SMILES': 'mol_gen',
            'constant': 'fragment_constant',
            f'{property}_from_smiles': 'fragment_from', 
            f'{property}_to_smiles': 'fragment_to', 
            f'{property}_radius': 'radius', 
            f'{property}_avg': f'{property}_avg',
            'Rule_Info': 'Rule_Info',}
            
        dataTable["start"] = smi_from
        dataTable["constant"] = np.nan
        dataTable["Rule_Info"] = 'Rule_env_id: ' + dataTable[f"{property}_rule_environment_id"] + ' (N_Pairs=' + dataTable["EstFa_Rat_count"] + ')'
    ##
    dataTable_gen = dataTable[renameCols.keys()].rename(columns=renameCols)
    print(f"\tGenerate {dataTable_gen.shape[0]} analoges")
    return dataTable_gen

## --------------------------------------------------


In [7]:
myMMPsDB = "./Compounds_All.mmpdb"
smi_from = "c1cccnc1O"
radius = 0

In [12]:
!mmpdb generate --smiles c1cccnc1O ./Compounds_All.mmpdb

start	constant	from_smiles	to_smiles	r	pseudosmiles	final	heavies_diff	#pairs	pair_from_id	pair_from_smiles	pair_to_id	pair_to_smiles
Oc1ccccn1	*O	[*:1]c1ccccn1	[*:1]c1cccnc1	0	[*:1](~*)	Oc1cccnc1	0	27	KT-0097905	Cc1cc(OCCNC(=O)c2ccccn2)nc(-c2cnn(CC3CCC(F)(F)CC3)c2)c1	KT-0097906	Cc1cc(OCCNC(=O)c2cccnc2)nc(-c2cnn(CC3CCC(F)(F)CC3)c2)c1
Oc1ccccn1	*O	[*:1]c1ccccn1	[*:1]c1ccccc1	0	[*:1](~*)	Oc1ccccc1	0	24	KT-0005103	CN(C[C@H]1CC[C@H](n2cc3cc(NC(=O)c4ccccn4)c(C(C)(C)O)cc3n2)CC1)C1CCN(c2cccc3c2n(C)c(=O)n3C2CCC(=O)NC2=O)CC1	KT-0006900	CN(C[C@H]1CC[C@H](n2cc3cc(NC(=O)c4ccccc4)c(C(C)(C)O)cc3n2)CC1)C1CCN(c2cccc3c2n(C)c(=O)n3C2CCC(=O)NC2=O)CC1
Oc1ccccn1	*O	[*:1]c1ccccn1	[*:1]c1cccc(C(F)(F)F)n1	0	[*:1](~*)	Oc1cccc(C(F)(F)F)n1	4	18	KT-0004813	CN(C[C@H]1CC[C@H](n2cc3cc(NC(=O)c4ccccn4)c(C(C)(C)O)cc3n2)CC1)C1CCN(Cc2cccc3c2n(C)c(=O)n3C2CCC(=O)NC2=O)CC1	KT-0003545	CN(C[C@H]1CC[C@H](n2cc3cc(NC(=O)c4cccc(C(F)(F)F)n4)c(C(C)(C)O)cc3n2)CC1)C1CCN(Cc2cccc3c2n(C)c(=O)n3C2CCC(=O)NC2=O)CC1
Oc1ccccn1	*O	[*:1]c1cccc

In [None]:
# !mmpdb transform --smiles 'c1cccnc1O' "./Compounds_All.mmpdb" --property EstFa_Rat --property hERG_mixedIC50 -r 0

In [8]:
property = "EstFa_Rat"
CleanResults(smi_from=smi_from, myMMPsDB=myMMPsDB, property=property, radius=radius)

	Commands: mmpdb transform --smiles c1cccnc1O ./Compounds_All.mmpdb -r 0 --property EstFa_Rat -r 0
	Generate 10 analoges


Unnamed: 0,mol_start,mol_gen,fragment_constant,fragment_from,fragment_to,radius,EstFa_Rat_avg,Rule_Info
1,c1cccnc1O,Cc1cccc(-c2ccccn2)c1,,[*:1]O,[*:1]c1cccc(C)c1,0,-0.42031,Rule_env_id: 631132 (N_Pairs=1)
2,c1cccnc1O,Cc1cccc(O)n1,,[*:1]c1ccccn1,[*:1]c1cccc(C)n1,0,0.03113,Rule_env_id: 117783 (N_Pairs=2)
3,c1cccnc1O,Cc1cccc(Sc2ccccn2)c1,,[*:1]O,[*:1]Sc1cccc(C)c1,0,-0.82887,Rule_env_id: 595832 (N_Pairs=1)
4,c1cccnc1O,Cc1ccnc(Sc2ccccn2)c1,,[*:1]O,[*:1]Sc1cc(C)ccn1,0,-0.49947,Rule_env_id: 631165 (N_Pairs=1)
5,c1cccnc1O,Cc1nc(O)ccc1F,,[*:1]c1ccccn1,[*:1]c1ccc(F)c(C)n1,0,0.096031,Rule_env_id: 121519 (N_Pairs=2)
6,c1cccnc1O,Cc1nccc(O)n1,,[*:1]c1ccccn1,[*:1]c1ccnc(C)n1,0,0.24447,Rule_env_id: 133818 (N_Pairs=1)
7,c1cccnc1O,Oc1cccc(C(F)(F)F)c1,,[*:1]c1ccccn1,[*:1]c1cccc(C(F)(F)F)c1,0,0.013248,Rule_env_id: 118161 (N_Pairs=1)
8,c1cccnc1O,Oc1cccc(C(F)(F)F)n1,,[*:1]c1ccccn1,[*:1]c1cccc(C(F)(F)F)n1,0,0.030313,Rule_env_id: 117753 (N_Pairs=4)
9,c1cccnc1O,Oc1cnn2cccnc12,,[*:1]c1ccccn1,[*:1]c1cnn2cccnc12,0,-0.00075395,Rule_env_id: 117807 (N_Pairs=2)
10,c1cccnc1O,c1ccncc1,,[*:1]O,[*:1][H],0,0.2466,Rule_env_id: 153031 (N_Pairs=1)


In [9]:
property = None
CleanResults(smi_from=smi_from, myMMPsDB=myMMPsDB, property=property, radius=radius)

	Commands: mmpdb generate --smiles c1cccnc1O ./Compounds_All.mmpdb --radius 0 --radius 0
	Generate 616 analoges


                            

Unnamed: 0,mol_start,mol_gen,fragment_constant,fragment_from,fragment_to,radius,Rule_Info
1,Oc1ccccn1,Oc1cccnc1,*O,[*:1]c1ccccn1,[*:1]c1cccnc1,0,KT-0097905=>KT-0097906 (N_Pairs=27)
2,Oc1ccccn1,Oc1ccccc1,*O,[*:1]c1ccccn1,[*:1]c1ccccc1,0,KT-0005103=>KT-0006900 (N_Pairs=24)
3,Oc1ccccn1,Oc1cccc(C(F)(F)F)n1,*O,[*:1]c1ccccn1,[*:1]c1cccc(C(F)(F)F)n1,0,KT-0004813=>KT-0003545 (N_Pairs=18)
4,Oc1ccccn1,Oc1ccncc1,*O,[*:1]c1ccccn1,[*:1]c1ccncc1,0,KT-0000547=>KT-0000645 (N_Pairs=15)
5,Oc1ccccn1,CO,*O,[*:1]c1ccccn1,[*:1]C,0,KT-0097905=>KT-0093967 (N_Pairs=12)
...,...,...,...,...,...,...,...
612,Oc1ccccn1,CCc1cn(-c2ccccn2)nn1,*c1ccccn1,[*:1]O,[*:1]n1cc(CC)nn1,0,KT-0193440=>KT-0194343 (N_Pairs=1)
613,Oc1ccccn1,CCc1ccn(-c2ccccn2)n1,*c1ccccn1,[*:1]O,[*:1]n1ccc(CC)n1,0,KT-0193440=>KT-0194941 (N_Pairs=1)
614,Oc1ccccn1,O=c1ccccn1-c1ccccn1,*c1ccccn1,[*:1]O,[*:1]n1ccccc1=O,0,KT-0035737=>KT-0036902 (N_Pairs=1)
615,Oc1ccccn1,c1ccc(-n2ccnn2)nc1,*c1ccccn1,[*:1]O,[*:1]n1ccnn1,0,KT-0034214=>KT-0038287 (N_Pairs=1)


In [None]:
'''
mmpdb generate
Options:
  --smiles SMILES                 The full molecule to process
  --constant SMILES               The constant fragment SMILES
  --query, --variable SMILES      The query/variable fragment SMILES
  --subqueries / --no-subqueries  If specified, also generate and include subfragments of the query fragment
  --radius [0|1|2|3|4|5]          Fingerprint environment radius (default: 0)
  --min-pairs N                   Only consider rules with at least N matched molecular pairs
  --select-pair [first|better|quadratic|min|random]
                                  If 'first' (fastest), select a representative pair arbitrarily. 
                                  If 'quadratic' or 'better', minimize sum of num_heavies**2. 
                                  If 'min', use the minimum num_heavies for either side. 
                                  If 'random', select one at random.
  -o, --output FILENAME
  --columns STR1,STR2,...         A comma-separated list of output fields (see below for the default)
  --headers STR1,STR2,...         A comma-separated list of column headers (default uses --fields)
  --no-header                     Use --no-header to exclude the column headers in the output
  -j, --num-jobs INTEGER RANGE    Number of processes to use when welding SMILES (0 means use all available CPUs) [x>=0]
  --chunksize INTEGER RANGE       Number of SMILES to process in each multiprocessing work unit (default: 100) [x>=1]
  --in-memory                     Load the SQLite database into memory before use
  --explain                       Explain the steps in the generation process
  --help                          Show this message and exit.
'''

In [None]:
'''
mmpdb transform
Options:
  -s, --smiles TEXT               The base structure to transform  [required]
  --min-variable-size N           Require at least N atoms in the variable fragment (default: 0)
  --max-variable-size N           Allow at most N atoms in the variable fragment (default: 9999)
  --min-constant-size N           Require at least N atoms in the constant fragment (default: 0)
  -r, --min-radius [0|1|2|3|4|5]  Fingerprint radius (default: 0)
  --min-pairs N                   Require at least N pairs in the transformation to report a product (default: 0)
  -S, --substructure SMARTS       Require the substructure pattern in the product
  --no-properties                 Don't use any properties
  -p, --property NAME             Property to use (may be specified multiple times)
  --rule-selection-cutoffs LIST   Evaluate rule environments with the given minimum pair count. If multiple counts are
                                  given, consider them in turn until there is a selected environment. (default: '10,5,0')
  --score EXPR                    Use to break ties when multiple rules produce the same SMILES
  --where EXPR                    Select only rules for which the expression is true
  -j, --jobs N                    Number of jobs to run in parallel (default: 1)
  --explain                       Explain each of the steps in the transformation process
  -o, --output FILENAME           Save the output to FILENAME (default=stdout)
  --times                         Report timing information for each step
  --help                          Show this message and exit.
'''