In [1]:
from rdkit import Chem
from rdkit.Chem.rdFingerprintGenerator import GetRDKitFPGenerator

# Load a molecule from SMILES
Kavaratamide_A_SMILES = "CCCCCCC[C@H](O)CC(=O)N[C@@H](C(C)C)C(=O)N(C)[C@@H](C)C(=O)O[C@@H](C(C)C)C(=O)N1[C@@H](C(C)C)C(OC)=CC1=O"
Kav_mol = Chem.MolFromSmiles(Kavaratamide_A_SMILES)

# Create the RDKit fingerprint generator with maxPath=5 and fpSize=2048
fpgen = GetRDKitFPGenerator(maxPath=10, fpSize=2048)

# Generate the fingerprint
fp = fpgen.GetFingerprint(Kav_mol)
print(fp.ToBitString())  # Print the binary fingerprint


1111111111111111111111111111111111111111111111111111111011111111111111111111111111111111101111111111111111111111111111111111111111111111011111111111111111111111111111111111111111111111011111111111111011111111111111111110111111111101111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111101111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111101111110111111111111111111111111111111111111111111111111001111111111111111111101011111110111111111111101111111111111111111111110111111111111111111111111111111111111111101110111111111111101111111111111011111111111111111111111111111101011111111011111111011011111111111111111111111111111111111111111101111111111111111111111111111111110111111111101111111111101111011111111111111111111111101111111110111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111101101111111111111

In [2]:
def isomeric_to_canonical_smiles(isomeric_smiles):

    mol = Chem.MolFromSmiles(isomeric_smiles)
    Chem.RemoveStereochemistry( mol ) 

    canonical_smiles = Chem.MolToSmiles(mol, canonical=True)
    
    return canonical_smiles

Kavaratamide_A_SMILES = "CCCCCCC[C@H](O)CC(=O)N[C@@H](C(C)C)C(=O)N(C)[C@@H](C)C(=O)O[C@@H](C(C)C)C(=O)N1[C@@H](C(C)C)C(OC)=CC1=O"
Kavaratamide_A_SMILES = isomeric_to_canonical_smiles(Kavaratamide_A_SMILES)
s_old = "CCCCCCCC(CC=CCCC(=O)N(C)C(C)C(=O)NC(C)C(O)C(C)C(=O)N1C(=O)C=C(OC)C1C(C)C)OC"
s_new = "CCCCCCC(O)CC(=O)NC1CCOC1=O"

Kav_mol = Chem.MolFromSmiles(Kavaratamide_A_SMILES)
s_old_mol = Chem.MolFromSmiles(s_old)
s_new_mol = Chem.MolFromSmiles(s_new)

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from typing import Set, Dict, List
import re

def get_functional_groups(smiles: str) -> Set[str]:
    """
    Identify functional groups present in a molecule from its SMILES string.
    
    Args:
        smiles (str): SMILES representation of the molecule
        
    Returns:
        Set[str]: Set of functional group names found in the molecule
    """
    # Create molecule from SMILES
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise ValueError(f"Invalid SMILES string: {smiles}")
    
    # Dictionary of functional groups with their SMARTS patterns
    functional_groups: Dict[str, str] = {
        "alcohol": "[#6]-[#8X2H]",
        "aldehyde": "[#6]-[#6;X3]([#1])=[#8X1]",
        "ketone": "[#6]-[#6X3](=[#8X1])-[#6]",
        "carboxylic_acid": "[#6]-[#6X3](=[#8X1])-[#8X2H]",
        "ester": "[#6]-[#6X3](=[#8X1])-[#8X2]-[#6]",
        "ether": "[#6]-[#8X2]-[#6]",
        "amine_primary": "[#6]-[#7X3;H2]",
        "amine_secondary": "[#6]-[#7X3;H1](-[#6])",
        "amine_tertiary": "[#6]-[#7X3](-[#6])-[#6]",
        "amide": "[#6]-[#6X3](=[#8X1])-[#7X3]",
        "nitro": "[#6]-[#7X3](=[#8X1])=[#8X1]",
        "nitrile": "[#6]-[#6X2]#[#7X1]",
        "sulfide": "[#6]-[#16X2]-[#6]",
        "sulfoxide": "[#6]-[#16X3](=[#8X1])-[#6]",
        "sulfone": "[#6]-[#16X4](=[#8X1])(=[#8X1])-[#6]",
        "sulfonamide": "[#6]-[#16X4](=[#8X1])(=[#8X1])-[#7X3]",
        "phosphate": "[#6]-[#8X2]-[#15X4](=[#8X1])(-[#8X2])-[#8X2]",
        "halogen_F": "[#6]-[F]",
        "halogen_Cl": "[#6]-[Cl]",
        "halogen_Br": "[#6]-[Br]",
        "halogen_I": "[#6]-[I]",
        "alkene": "[#6]=[#6]",
        "alkyne": "[#6]#[#6]",
        "aromatic": "c1ccccc1",
        "phenol": "[cX3]1[cX3][cX3][cX3][cX3][cX3]1-[#8X2H]",
        "thiol": "[#6]-[#16X2H]",
        "acyl_halide": "[#6]-[#6X3](=[#8X1])-[F,Cl,Br,I]",
        "anhydride": "[#6]-[#6X3](=[#8X1])-[#8X2]-[#6X3](=[#8X1])-[#6]",
        "azide": "[#6]-[#7X2]=[#7X2]=[#7X1-]",
        "azo": "[#6]-[#7X2]=[#7X2]-[#6]",
        "epoxide": "[#6R1]1[#8X2R1][#6R1]1",
        "isocyanate": "[#6]-[#7X2]=[#6X2]=[#8X1]",
        "isothiocyanate": "[#6]-[#7X2]=[#6X2]=[#16X1]"
    }
    
    # Find all functional groups in the molecule
    found_groups = set()
    
    for name, smarts in functional_groups.items():
        pattern = Chem.MolFromSmarts(smarts)
        if pattern and mol.HasSubstructMatch(pattern):
            found_groups.add(name)
    
    return found_groups


In [5]:
from rdkit.Chem.FilterCatalog import GetFunctionalGroupHierarchy

In [20]:
functionalGroups = GetFunctionalGroupHierarchy()

In [43]:
mols = [match.filterMatch.GetPattern()
    for match in functionalGroups.GetFilterMatches(
        Chem.MolFromSmiles(Kavaratamide_A_SMILES))]

In [46]:
mols

[<rdkit.Chem.rdchem.Mol at 0x735954cf5540>]

In [44]:
Chem.Draw.MolToFile(mols[0], 'functional_groups.png', size=(300, 300))

In [45]:
for mol in mols:
    print (Chem.MolToSmiles(mol))

O


In [26]:
help(Chem.rdfiltercatalog.SmartsMatcher)

Help on class SmartsMatcher in module rdkit.Chem.rdfiltercatalog:

class SmartsMatcher(FilterMatcherBase)
 |  Smarts Matcher Filter
 |   basic constructors:
 |     SmartsMatcher( name, smarts_pattern, minCount=1, maxCount=UINT_MAX )
 |     SmartsMatcher( name, molecule, minCount=1, maxCount=UINT_MAX )
 |
 |    note: If the supplied smarts pattern is not valid, the IsValid() function will
 |     return False
 |  >>> from rdkit.Chem.FilterCatalog import *
 |  >>> minCount, maxCount = 1,2
 |  >>> carbon_matcher = SmartsMatcher('Carbon', '[#6]', minCount, maxCount)
 |  >>> print (carbon_matcher.HasMatch(Chem.MolFromSmiles('CC')))
 |  True
 |  >>> print (carbon_matcher.HasMatch(Chem.MolFromSmiles('CCC')))
 |  False
 |  >>> carbon_matcher.SetMinCount(2)
 |  >>> print (carbon_matcher.HasMatch(Chem.MolFromSmiles('C')))
 |  False
 |  >>> carbon_matcher.SetMaxCount(3)
 |  >>> print (carbon_matcher.HasMatch(Chem.MolFromSmiles('CCC')))
 |  True
 |
 |  Method resolution order:
 |      SmartsMatcher

In [None]:
x = GetFunctionalGroupHierarchy()


TypeError: 'FilterCatalog' object is not iterable

In [38]:
from rdkit import Chem
from rdkit.Chem import GetFunctionalGroups
import pandas as pd

def analyze_functional_groups_hierarchy(smiles):
    """
    Analyze a molecule using RDKit's hierarchical functional group detection
    
    Args:
        smiles (str): SMILES string of the molecule
        
    Returns:
        dict: Dictionary with hierarchical functional group information
    """
    # Create molecule from SMILES
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise ValueError(f"Invalid SMILES string: {smiles}")
    
    # Get the functional group hierarchy
    hier = GetFunctionalGroupHierarchy()
    
    # Apply the hierarchy to the molecule
    groups = GetFunctionalGroups(mol, hier)
    
    # Process the results
    results = {}
    for i, group in enumerate(groups):
        # Get the group type from the hierarchy
        group_type = hier.GetGroupType(group)
        
        # Get the atoms in this functional group
        atoms = list(group)
        
        # Get the SMARTS pattern that matched
        pattern = hier.GetPatternSmarts(group_type)
        
        # Get the functional group description/name
        description = hier.GetTypeString(group_type)
        
        # Store the information
        if description not in results:
            results[description] = {
                'count': 0,
                'pattern': pattern,
                'occurrences': []
            }
        
        # Increment count and add this occurrence
        results[description]['count'] += 1
        results[description]['occurrences'].append({
            'atoms': atoms,
            'atom_indices': [atom.GetIdx() for atom in atoms]
        })
    
    return results

# Example usage
def demonstrate_functional_group_hierarchy():
    """Demonstrate the usage of GetFunctionalGroupHierarchy with examples"""
    examples = [
        "CCO",                        # Ethanol
        "CC(=O)O",                    # Acetic acid
        "c1ccccc1O",                  # Phenol
        "CC(=O)Cl",                   # Acetyl chloride
        "CC(=O)OC",                   # Methyl acetate
        "CNC(=O)C",                   # N-methylacetamide
        "CCCN(C)C",                   # N,N-dimethylpropylamine
        "CN=C=O",                     # Methyl isocyanate
        "CC(=O)CC(=O)C",              # 2,4-pentanedione
        "Cc1ccc(C(=O)O)cc1"           # 4-methylbenzoic acid
    ]
    
    for smiles in examples:
        print(f"\nAnalyzing: {smiles}")
        mol = Chem.MolFromSmiles(smiles)
        print(f"Molecule: {Chem.MolToSmiles(mol)}")
        
        results = analyze_functional_groups_hierarchy(smiles)
        
        print("Functional Groups Found:")
        for name, info in results.items():
            print(f"  - {name} (Count: {info['count']})")
            print(f"    SMARTS: {info['pattern']}")
            for i, occurrence in enumerate(info['occurrences']):
                print(f"    Occurrence {i+1}: Atom indices: {occurrence['atom_indices']}")
        
        print("-" * 50)

# To visualize the full hierarchy structure
def print_hierarchy_structure():
    """Print the structure of the functional group hierarchy"""
    hier = GetFunctionalGroupHierarchy()
    
    # Get all pattern types
    pattern_types = []
    for i in range(hier.GetNumPatterns()):
        pattern_types.append({
            'type': i,
            'name': hier.GetTypeString(i),
            'smarts': hier.GetPatternSmarts(i),
            'parent': hier.GetPatternParent(i)
        })
    
    # Convert to DataFrame for easier viewing
    df = pd.DataFrame(pattern_types)
    print(df)
    
    # Print hierarchy tree structure
    def print_children(parent_id, level=0):
        indent = "  " * level
        for i, row in df[df['parent'] == parent_id].iterrows():
            print(f"{indent}- {row['name']} (Type: {row['type']}, SMARTS: {row['smarts']})")
            print_children(row['type'], level + 1)
    
    print("\nHierarchy Structure:")
    print_children(-1)  # Start with root nodes (parent = -1)

# Run the examples
# demonstrate_functional_group_hierarchy()

# Print the hierarchy structure
print_hierarchy_structure()

ImportError: cannot import name 'GetFunctionalGroups' from 'rdkit.Chem' (/home/wangdong-xu/.venv/lib/python3.12/site-packages/rdkit/Chem/__init__.py)