In [1]:
from rdkit import Chem
from rdkit.Chem import Fragments
from rdkit.Chem import MACCSkeys
import pandas as pd
import numpy as np

In [5]:
#Please specify the functional group to check.
#"Discard": True will remove candidates with corresponding FG.
#"Discard": False will only keep candidates with corresponding FG.
#For example:
FG_to_check = {
    "FG" : "Cl",
    "Discard" : "True"
}
#This will return a non-chlorinated subset

In [2]:
#Please check FIND_STRUCT_BY_FP and FIND_GROUP for the name of functional group as filtration indicator
FIND_STRUCT_BY_FP = {
    "Cl" : 103,
    "F" : 42,
    "Br" : 46,
    "P" : 29,
    "NH2" : 84,
    "NH" : 151,
    "OH" : 139,
    "Aromatic" : 162,
    "C=O" : 154,
    "S=O" : 60
}

In [6]:
#Higher level structures will use Fragments method (instead of fingerprint only)

FIND_GROUP = {
    "Aliphatic_carboxylic_acids":Fragments.fr_Al_COO,
    "Aromatic_carboxylic_acids" : Fragments.fr_Ar_COO,
    "General_carboxylic_acids": Fragments.fr_COO,
    "Aliphatic_OH":Fragments.fr_Al_OH,
    "Aromatic_OH" : Fragments.fr_Ar_OH,
    "Aromatic_N" : Fragments.fr_Ar_N,
    "Aromatic_amines" : Fragments.fr_Ar_NH,
    "Tertiary_amines" : Fragments.fr_NH0,
    "Secondary_amines" : Fragments.fr_NH1,
    "Primary_amines" : Fragments.fr_NH2,
    "Hydroxylamine" : Fragments.fr_N_O,
    "Amides" : Fragments.fr_amide,
    "Imides" : Fragments.fr_imide,
    "Amidine" : Fragments.fr_amidine,
    "Aniline" : Fragments.fr_aniline,
    "Thio" : Fragments.fr_SH,
    "Aldehydes" : Fragments.fr_aldehyde,
    "Alkyl_carbamate" : Fragments.fr_alkyl_carbamate,
    "Alkyl_halide" : Fragments.fr_alkyl_halide,
    "Halogens" : Fragments.fr_halogen,
    "Aryl_methyl" : Fragments.fr_aryl_methyl,
    "Benzene_rings" : Fragments.fr_benzene,
    "Ester" : Fragments.fr_ester,
    "Ether" : Fragments.fr_ether,
    "Furan_rings" : Fragments.fr_furan,
    "Imidazole_rings" : Fragments.fr_imidazole,
    "Ketones" : Fragments.fr_ketone,
    "Lactones" : Fragments.fr_lactone,
    "Methoxy" : Fragments.fr_methoxy,
    "Nitriles" : Fragments.fr_nitrile,
    "Nitros" : Fragments.fr_nitro,
    "Nitro_benzene_rings" : Fragments.fr_nitro_arom,
    "Oxazole_rings" : Fragments.fr_oxazole,
    "Phenols" : Fragments.fr_phenol,
    "P_acids" : Fragments.fr_phos_acid,
    "P_esters" : Fragments.fr_phos_ester,
    "Primary_amides" : Fragments.fr_priamide,
    "Pyridine_rings" : Fragments.fr_pyridine,
    "Sulfide" : Fragments.fr_sulfide,
    "Sulfonamides" : Fragments.fr_sulfonamd,
    "Sulfone" : Fragments.fr_sulfone,
    "Thiazole_rings" : Fragments.fr_thiazole,
    "Linear_alkanes_over_4" : Fragments.fr_unbrch_alkane,
    "Urea" : Fragments.fr_urea
    }

In [4]:
def find_FG(sm, FG = "Aliphatic_OH"):
    mol = Chem.MolFromSmiles(sm)
    
    if FG not in FIND_STRUCT_BY_FP.keys() and (FG not in FIND_GROUP.keys()):
        raise ValueError(f"Cannot find {FG}. Please check FIND_GROUP or FIND_STRUCT_BY_FP for all avilable groups.")
    
    if FG in FIND_STRUCT_BY_FP.keys():
        find_FG_res = find_struct_from_fp(mol, FG)
        return find_FG_res

    else:
        n_FG = FIND_GROUP[FG](mol)
        print(f"Number of {FG} in this molecule is: {n_FG}")
    
        if n_FG != 0:
            find_FG_res = {"FG" : FG, "FG_exist" : "True"}
        else:
            find_FG_res = {"FG" : FG, "FG_exist" : "False"}
    return find_FG_res