<a href="https://colab.research.google.com/github/zhensongren/learn-ml/blob/master/generating_useful_features_from_smiles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install rdkit

Collecting rdkit
  Downloading rdkit-2023.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.5/30.5 MB[0m [31m37.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit
Successfully installed rdkit-2023.9.1


In [5]:
from rdkit import Chem
from rdkit.Chem import Descriptors, QED
import pandas as pd

def get_properties(smiles):
    mol = Chem.MolFromSmiles(smiles)

    if mol is None:
        return None

    properties = {
        'SMILES': smiles,
        'Molecular Weight': Descriptors.MolWt(mol),
        'LogP': Descriptors.MolLogP(mol),
        'Polar Surface Area': Descriptors.TPSA(mol),
        'Number of Rings': Descriptors.RingCount(mol),
        'Number of Rotatable Bonds': Descriptors.NumRotatableBonds(mol),
        'H-Bond Donors': Descriptors.NumHDonors(mol),
        'H-Bond Acceptors': Descriptors.NumHAcceptors(mol),
        'QED Weighted': QED.qed(mol),
    }

    return properties

# Test molecules (SMILES strings)
test_molecules = [
    "CCO",  # Ethanol
    "CC(=O)O",  # Acetic Acid
    "c1ccccc1",  # Benzene
    "C1CCCCC1",  # Cyclohexane
    "CCN(CC)CC",  # Diethylamine
]

# Process each molecule and store the results
properties_list = [get_properties(smiles) for smiles in test_molecules]
properties_df = pd.DataFrame(properties_list)

properties_df


Unnamed: 0,SMILES,Molecular Weight,LogP,Polar Surface Area,Number of Rings,Number of Rotatable Bonds,H-Bond Donors,H-Bond Acceptors,QED Weighted
0,CCO,46.069,-0.0014,20.23,0,0,1,1,0.406808
1,CC(=O)O,60.052,0.0909,37.3,0,0,1,1,0.429883
2,c1ccccc1,78.114,1.6866,0.0,1,0,0,0,0.442628
3,C1CCCCC1,84.162,2.3406,0.0,1,0,0,0,0.422316
4,CCN(CC)CC,101.193,1.3481,3.24,0,3,0,1,0.518375
