In [None]:
from rdkit import Chem
from rdkit.Chem.Scaffolds import MurckoScaffold
import numpy as np
import matplotlib.pyplot as plt
from rdkit.Chem import rdDepictor
from rdkit.Chem import rdDistGeom

In [None]:
def generateConformersFor(smi,nconf=10,addH=True):
    mol=Chem.MolFromSmiles(smi)
    if addH: mol=Chem.AddHs(mol)
    rdDistGeom.EmbedMolecule(mol)
    rdDistGeom.EmbedMultipleConfs(mol,nconf)
    return mol

In [None]:
def replaceFragment(mol,start_fragment,replace_fragment,silent=False,tries=1,replacement_connection_points=None,replacement_connection_skips=[],preserve_connection_distances=False,num_preserve_tries=100,require_match=False):
    # start_fragment and replace_fragment must be rdkit Mol objects
    fragment=start_fragment
    #replace_fragment can be constructed by MurckoScaffold.GetScaffoldForMol() as well
    if replacement_connection_points is not None:
        possible_replacement_fragment_connect_points=replacement_connection_points
    else:
        possible_replacement_fragment_connect_points=[]
        for at in replace_fragment.GetAtoms():
            if at.GetIdx() in replacement_connection_skips: continue
            if at.GetNumImplicitHs() >=1: # Can connect here
                possible_replacement_fragment_connect_points.append(at.GetIdx())

    remove_parts=mol.GetSubstructMatch(fragment)
    if not len(remove_parts):
        if require_match: raise ValueError("Match not found")
        else: return mol,False
    removed_connect_points=[]
    possible_connect_points=[]
    arom_connect_points=[]
    num_arom_connect_points=0
    for b in mol.GetBonds():
        a1=b.GetBeginAtom()
        a2=b.GetEndAtom()
        if (a1.GetIdx() in remove_parts) and (a2.GetIdx() not in remove_parts):
            possible_connect_points.append(a2.GetIdx())
            removed_connect_points.append(a1.GetIdx())
            if a2.GetIsAromatic() or preserve_connection_distances:
                arom_connect_points.append(a2.GetIdx())
                num_arom_connect_points+=1
        elif (a2.GetIdx() in remove_parts) and (a1.GetIdx() not in remove_parts):
            possible_connect_points.append(a1.GetIdx())
            removed_connect_points.append(a2.GetIdx())
            if a1.GetIsAromatic() or preserve_connection_distances:
                arom_connect_points.append(a1.GetIdx())
                num_arom_connect_points+=1
    if not silent: print("Aromatic connect points:",num_arom_connect_points)
    
    keep_parts=[]
    for i in range(mol.GetNumAtoms()):
        if i in remove_parts: continue
        keep_parts.append(i)
    keep_parts

    if preserve_connection_distances:
        original_dmat=Chem.GetDistanceMatrix(mol)
        sub_dmat=original_dmat[removed_connect_points][:,removed_connect_points]
        if not silent:
            print("Sub-matrix of connection distances:")
            print(sub_dmat)

    combo=Chem.CombineMols(replace_fragment,mol)
    for i in keep_parts:
        nidx=i+replace_fragment.GetNumAtoms()
        combo.GetAtomWithIdx(nidx).SetIsAromatic(False)
    edcombo = Chem.EditableMol(combo)
    
    if num_arom_connect_points>1:
        r_dmat=Chem.GetDistanceMatrix(replace_fragment)
        try:
            c1=np.random.choice(possible_replacement_fragment_connect_points,num_arom_connect_points,replace=False)
            if preserve_connection_distances:
                r_submat=r_dmat[c1][:,c1]
                agree=np.all(r_submat==sub_dmat)
                n_ty=0
                while (n_ty<=num_preserve_tries) and (not agree):
                    n_ty+=1
                    c1=np.random.choice(possible_replacement_fragment_connect_points,num_arom_connect_points,replace=False)
                    r_submat=r_dmat[c1][:,c1]
                    agree=np.all(r_submat==sub_dmat)
                if not agree and not silent:
                    print("WARN: All distances to be preserved could not be kept")
            #print(c1)
        except:
            print("WARN: Unique connection points in replacement fragment may not be sufficient!")
            c1=np.random.choice(possible_replacement_fragment_connect_points,num_arom_connect_points,replace=True)
        c2=np.random.choice(arom_connect_points,num_arom_connect_points,replace=False)+replace_fragment.GetNumAtoms()
        #print(c2)
        for eli,el in enumerate(c2):
            eli=int(c1[eli])
            el=int(el)
            if not silent:
                if not silent: print("Selected atom",eli,"of replacement fragment as connection point")
                print("Atom index (shifted)",el,"from retained molecule is used as connection point")
            edcombo.AddBond(eli,el,order=Chem.rdchem.BondType.SINGLE)
    else:
        c1=np.random.choice(possible_replacement_fragment_connect_points).item()
        c1=int(c1)
        if not silent: print("Selected atom",c1,"of replacement fragment as connection point")
        c2=np.random.choice(possible_connect_points)+replace_fragment.GetNumAtoms()
        if not silent: print("Atom index (shifted)",c2,"from retained molecule is used as connection point")
        c2=int(c2)
        edcombo.AddBond(c1,c2,order=Chem.rdchem.BondType.SINGLE)
        
    for delidx in sorted(remove_parts,reverse=True):
        nidx=delidx+replace_fragment.GetNumAtoms()
        edcombo.RemoveAtom(nidx)
    mod_mol=edcombo.GetMol()
    if not silent and mod_mol is None:
        print("Final molecule is null!")
        final_mol=None
    else:
        final_smi=Chem.MolToSmiles(mod_mol,kekuleSmiles=True)
        final_mol=Chem.MolFromSmiles(final_smi)
        if not silent and final_mol is None: print(final_smi)
    if final_mol is None and tries>1:
        if not silent: print("Try failed. Repeating.\t",tries-1,"tries left")
        return replaceFragment(mol,start_fragment,replace_fragment,silent,tries-1)
    else:
        if final_mol is None: return final_mol,False
        else: return final_mol,True