This notebook helps to check substructures involved in a SMILES string

In [5]:
from rdkit import Chem
from rdkit.Chem import MACCSkeys

In [6]:
#Please input the SMILES string here
sm = "CS(=O)C"

In [7]:
def get_MACCS_from_sm(sm):
    mol = Chem.MolFromSmiles(sm)
    fp = MACCSkeys.GenMACCSKeys(mol)
    mol_MACCS = fp.ToBitString()
    return mol_MACCS, fp

In [8]:
#generate MACCS keys fingerprint
mol_MACCS, fp = get_MACCS_from_sm(sm)

In [9]:
#You can check the fingerprint of this molecule here:
mol_MACCS

'00000000000000000000000000000000000000000000000000010000000011000001000001100000000000101000010000000010000000000000000000001000000000000000000000001100000000001000100'

In [10]:
#Optional: check the total number of substructures present (the on_bit) or absent (the off_bit)
print(f"Total number of bits: {fp.GetNumBits()}") # get the total number of bits
print(f"Total number of 0 - off bits: {fp.GetNumOffBits()}")
print(f"Total number of 1 - on bits: {fp.GetNumOnBits()}")

Total number of bits: 167
Total number of 0 - off bits: 152
Total number of 1 - on bits: 15


In [11]:
#Check the substructures involved in this molecule
all_on_bit = []
fp_bit_string = fp.ToBitString()
for i in range(len(fp_bit_string)):
    on_bit = {}
    this_bit = int(fp_bit_string[i])
    if this_bit == 1:
        on_bit["idx"] = i
        on_bit["chem_mean"] = MACCSkeys.smartsPatts[i]
        all_on_bit.append(on_bit)
        print(on_bit)

{'idx': 51, 'chem_mean': ('[#6]~[#16]~[#8]', 0)}
{'idx': 60, 'chem_mean': ('[#16]=[#8]', 0)}
{'idx': 61, 'chem_mean': ('*~[#16](~*)~*', 0)}
{'idx': 67, 'chem_mean': ('[!#6;!#1]~[#16]', 0)}
{'idx': 73, 'chem_mean': ('[#16]=*', 0)}
{'idx': 74, 'chem_mean': ('[CH3]~*~[CH3]', 0)}
{'idx': 86, 'chem_mean': ('[C;H2,H3][!#6;!#1][C;H2,H3]', 0)}
{'idx': 88, 'chem_mean': ('[#16]', 0)}
{'idx': 93, 'chem_mean': ('[!#6;!#1]~[CH3]', 0)}
{'idx': 102, 'chem_mean': ('[!#6;!#1]~[#8]', 0)}
{'idx': 124, 'chem_mean': ('[!#6;!#1]~[!#6;!#1]', 0)}
{'idx': 148, 'chem_mean': ('*~[!#6;!#1](~*)~*', 0)}
{'idx': 149, 'chem_mean': ('[C;H3,H4]', 1)}
{'idx': 160, 'chem_mean': ('[C;H3,H4]', 0)}
{'idx': 164, 'chem_mean': ('[#8]', 0)}
