## Physicochemical properties of glycosides from different biological sources

In [None]:
import pandas as pd
from rdkit import Chem
import collections
from rdkit.Chem import rdDepictor
from rdkit.Chem import rdMolDescriptors as rdescriptors
from rdkit.Chem import Descriptors
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

### 1. Natural glycosides with biological source --filter out glycosides without aglycones (sugar itself)

In [None]:
df=pd.read_csv('../5 Biological Source and Glycosylation/Species_SugarResults.csv')
df2=df[(df['hadOrHasSugars']==True)&(~(df['deglycosylatedMoleculeSMILES']=='[empty]'))]
df2.reset_index(drop=True,inplace=True)
df2

### 2. The number of sugar units of natural glycosides

In [None]:
sugar_num_count=[]
for i in range(len(df2)):
    sugar=str(df2['SugarMoietySMILES'][i])
    if(sugar!='[]'):
        num=sugar.split(',')
        sugar_num_count.append(len(num))
    else:
        sugar_num_count.append(0)
df2['NumSugar']=sugar_num_count
df2

### 3. Calculate the physicochemical properties of natural glycosides

In [None]:
smileList = df2.originalMoleculeSMILES.values
HBA = []
HBD = []
AlogP = []
TPSA = []
num_rotatable_bonds = []
num_heavy_atoms = []
MW = []
FractionCSP3 = []
NumAromaticRings = []
RingCount = []
NOCount=[]
NHOHCount=[]
NumCAtoms=[]
NumOAtoms=[]
NumNAtoms=[]
NumSAtoms=[]
NumAliphaticRings=[]
NumHeteroatoms=[]
for i in smileList:
    mol = Chem.MolFromSmiles(i)
    HBA.append(rdescriptors.CalcNumLipinskiHBA(mol))
    HBD.append(rdescriptors.CalcNumLipinskiHBD(mol))
    TPSA.append(float(rdescriptors.CalcTPSA(mol)))
    FractionCSP3.append(rdescriptors.CalcFractionCSP3(mol))
    num_heavy_atoms.append(mol.GetNumHeavyAtoms())
    num_rotatable_bonds.append(Descriptors.NumRotatableBonds(mol))
    AlogP.append(Descriptors.MolLogP(mol))
    MW.append(float(Descriptors.MolWt(mol)))
    NumAromaticRings.append(Descriptors.NumAromaticRings(mol))
    RingCount.append(Descriptors.RingCount(mol))
    NOCount.append(Descriptors.NOCount(mol))
    NHOHCount.append(Descriptors.NHOHCount(mol))
    NumAliphaticRings.append(Descriptors.NumAliphaticRings(mol))
    NumHeteroatoms.append(Descriptors.NumHeteroatoms(mol))
    C=N=O=S=0
    for j in i:
        if j=='C':
            C=C+1
        elif j=='O':
            O=O+1
        elif j=='N':
            N=N+1
        elif j=='S':
            S=S+1
    NumCAtoms.append(C)
    NumOAtoms.append(O)
    NumNAtoms.append(N)
    NumSAtoms.append(S)
df2["HBA"] = HBA
df2["HBD"] = HBD
df2["AlogP"] = AlogP
df2["TPSA"] = TPSA
df2["num_rotatable_bonds"] = num_rotatable_bonds
df2["num_heavy_atoms"] = num_heavy_atoms
df2["MW"] = MW
df2["NumAromaticRings"] = NumAromaticRings
df2["RingCount"] = RingCount
df2["FractionCSP3"] = FractionCSP3
df2['NOCount']=NOCount
df2['NHOHCount']=NHOHCount
df2['NumCAtoms']=NumCAtoms
df2['NumOAtoms']=NumOAtoms
df2['NumNAtoms']=NumNAtoms
df2['NumSAtoms']=NumSAtoms
df2['NumAliphaticRings']=NumAliphaticRings
df2['NumHeteroatoms']=NumHeteroatoms
df2.to_csv('Glycosides_Property.csv',index=False)
df2

### 4. The average number of physicochemical properties of glycosides from different biological sources

In [None]:
species_list=['Animal','Plant','Bacteria','Fungi']
property_list=['HBA','HBD','AlogP','TPSA','MW','num_rotatable_bonds','num_heavy_atoms','NumAromaticRings','RingCount','FractionCSP3','NOCount',
              'NHOHCount','NumCAtoms','NumOAtoms','NumNAtoms','NumSAtoms','NumAliphaticRings','NumHeteroatoms']
Glycosides_Property=pd.DataFrame(property_list,columns=[''])
for i in species_list:
    compound=df2[df2[i]==1]
    compound_counts=len(compound)
    Ave_Property=[]
    for j in property_list:
        ave_property=sum(compound[j].values)/compound_counts
        ave_property=round(ave_property,2)
        Ave_Property.append(ave_property)
    Glycosides_Property[i]=Ave_Property
Glycosides_Property

### 5. Physicochemical properties of all glycosides

In [None]:
compound=df2
compound_counts=len(compound)
Ave_Property=[]
property_list=['HBA','HBD','AlogP','TPSA','MW','num_rotatable_bonds','num_heavy_atoms','NumAromaticRings','RingCount','FractionCSP3','NOCount',
              'NHOHCount','NumCAtoms','NumOAtoms','NumNAtoms','NumSAtoms','NumAliphaticRings','NumHeteroatoms']
for j in property_list:
    ave_property=sum(compound[j].values)/compound_counts
    ave_property=round(ave_property,2)
    Ave_Property.append((j,ave_property))
pd.DataFrame(Ave_Property,columns=['Property','Ave'])