# Property calculations

The RDKit allows for the calculation of several quantitative chemical properties. These in turn can be used as parameters for machine learning. 
The scripts below use the RDKit to calculate properties for all of the chemicals labeled in the [descriptor_clustering](descriptor_clustering.ipynb) notebook. 

In [1]:
import os.path as path
import pickle

# Load merged FEMA-JECFA database
BASE_DATA_PATH = path.join(path.expanduser('~'),
                           'Dropbox',
                           'bymt',
                           'data_dumps',
                           'chem_project')

labeled_chemicals_path = path.join(BASE_DATA_PATH,
                                  'descriptor_clustering',
                                  'labeled_chemicals.pkl')
with open(labeled_chemicals_path, 'rb') as f:
    labeled_chemicals = pickle.load(f)
    
DATA_PATH = path.join(BASE_DATA_PATH, 'property_calculations')

In [19]:
from rdkit.Chem import Descriptors
import types

functions = [(a, Descriptors.__dict__.get(a)) for a in dir(Descriptors)\
             if isinstance(Descriptors.__dict__.get(a), types.FunctionType)]

to_remove = ['_isCallable', '_setupDescriptors', '_test']

functions = [tup for tup in functions if tup[0] not in to_remove]

print(len(functions))

207


In [20]:
from copy import deepcopy

def property_calculator(dicto_list, function_list):
    """
    Applies functions in the function list to rdkit molecules in the chemical dictionaries
    in dicto_list
    
    dicto_list should have dictos with an 'rdkit mol' value
    function_list is a list of tuples with:
    -tup[0] = function name
    -tup[1] = callable function
    
    returns: copy of dicto_list with calculated properties added to each dicto
    """
    new_list = deepcopy(dicto_list)
    for dicto in new_list:
        mol = dicto.get('rdkit mol')
        for function in functions:
            try:
                dicto[function[0]] = function[1](mol)
            except:
                dicto[function[0]] = 'NaN'
    return new_list         

In [21]:
propertied_chemicals = property_calculator(labeled_chemicals, functions)

In [23]:
from rdkit.Chem.Fingerprints import FingerprintMols

x = labeled_chemicals[0]['rdkit mol']
fp = FingerprintMols.FingerprintMol(x)

In [24]:
fp

<rdkit.DataStructs.cDataStructs.ExplicitBitVect at 0x10c0b79e0>

In [25]:
from rdkit.Chem import MACCSkeys
fp = MACCSkeys.GenMACCSKeys(x)
fp

<rdkit.DataStructs.cDataStructs.ExplicitBitVect at 0x10f3bd490>