## The Murcko scaffolds for all natural products

In [None]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Scaffolds import MurckoScaffold
from rdkit.Chem import AllChem as ch
from rdkit.Chem import Draw as d
import pandas as pd
from rdkit import DataStructs
import collections
from rdkit.Chem.MolStandardize import rdMolStandardize

### 1. Filter out the sugar itself

In [None]:
df=pd.read_csv('../3 Removing Sugar/3.1 Removing All Sugar/deglycosylation_results.csv')
df=df[df['deglycosylatedMoleculeSMILES']!='[empty]']
df.reset_index(drop=True,inplace=True)
df

### 2. Define the function to calculate the Murcko scaffold

In [None]:
def Murcko_Skeleton(df,filename):
    deglycosylatedMoleculeSMILES=df.deglycosylatedMoleculeSMILES.values
    # Keeping the largest molecular fragment
    mols=[]
    for i in deglycosylatedMoleculeSMILES:
        mol=Chem.MolFromSmiles(i)
        mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols = True)
        largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms())
        mols.append(largest_mol)
        
    database_atomic_scaffolds = [MurckoScaffold.GetScaffoldForMol(mol) for mol in mols]
    for i in database_atomic_scaffolds:
        i.Compute2DCoords()
    def genericize_scaffold(s):
        try:
            return MurckoScaffold.MakeScaffoldGeneric(s)
        except ValueError:
            return None
    database_grafh_scaffolds = [genericize_scaffold(s) for s in database_atomic_scaffolds]
    scaffold_smiles = [Chem.MolToSmiles(scaffold) for scaffold in database_atomic_scaffolds if scaffold != None]
    
    df['Scaffold_Smiles']=scaffold_smiles
    df.to_csv(filename+'_Scaffold.csv',index=False)
    
    counter=collections.Counter(scaffold_smiles)
    columns = ['Smiles','Num']
    results = pd.DataFrame(counter.items(),columns=columns)
    results = results.sort_values(by=['Num'],ascending=False)
    results.reset_index(drop=True,inplace=True)
    results.to_csv(filename+'_Murcko.csv',index=False)

### 3. Generate Murcko frameworks for all natural products

In [None]:
Murcko_Skeleton(df,'All_Molecules')