In [24]:
import pandas as pd
import numpy as np
import math
import requests
import chemical_conversions

print(chemical_conversions.get_smiles_from_name('clorobenzene')) #check for Name with benzene, should be 'c1ccccc1'
print(chemical_conversions.get_smiles_from_cas('110-82-7')) #check for CAS with cyclohexane, should be 'C1CCCCC1'

Clc1ccccc1
C1CCCCC1


In [19]:
#pass SMILES and Name from an excel sheet to a dataframe and pass them to different arrays
file_name = 'compounds.xlsx'
dataframe = pd.read_excel(file_name) #pass data to a dataframe called "dataframe"
smiles_array = dataframe['SMILES'].to_numpy() #pass SMILES to an array called "smiles_array"
name_array = dataframe['Name'].to_numpy() #pass Name to an array called "name_array"
cas_array = dataframe['CAS'].to_numpy() #pass CAS to an array called "cas_array"

if type(smiles_array[0]) != str:
    print('There is no SMILES. Use Name instead.') #if there is no SMILES, use Name
    if type(name_array[0]) != str:
        print('There is no Name. Use CAS instead.') #if there is no Name, use CAS
        if type(cas_array[0]) != str:
            print('There is no CAS. Cannot fetch data.') #if there is no CAS, cannot fetch data
        else:
            smiles_array = []
            for i in range(len(cas_array)):
                smiles_array.append(chemical_conversions.get_smiles_from_cas(cas_array[i])) #get SMILES from CAS
            dataframe['SMILES'] = smiles_array #pass CAS-based SMILES to the dataframe
    else:
        smiles_array = []
        for i in range(len(name_array)):
            smiles_array.append(chemical_conversions.get_smiles_from_name(name_array[i])) #get SMILES from Name
        dataframe['SMILES'] = smiles_array #pass Name-based SMILES to the dataframe
dataframe

Unnamed: 0,SMILES,Name,CAS
0,[N-]=[N+]=O,,
1,CCC(C)C,,
2,CC(C)=O,,
3,CCCC(C)C,,
4,CCC(C)CC,,
...,...,...,...
403,CCO[Si](OCC)(O[Si](C)(C)C)O[Si](C)(C)C,,
404,COc1cc2c(cc1OC)C(=O)NCC2,,
405,COc1ccc(Oc2nonc2N)cc1,,
406,CCO[Si](OCC)(O[Si](C)(C)C)O[Si](C)(C)C,,


In [20]:
# create an array of API strings with SMILES appended
api_array = []
for i in range(len(smiles_array)):
    api = 'https://ysi.ml.nrel.gov/api/'
    if type(smiles_array[i]) != str:
        smiles_array[i] = 'NO_SMILES'
        print('No SMILES at row =', i)
    api += smiles_array[i]
    api_array.append(api)

No SMILES at row = 133
No SMILES at row = 268
No SMILES at row = 363


In [25]:
#pass measured YSI and predicted YSI to the dataframe
ysi_measure_array = []
ysi_predict_array = []
for i in range(len(api_array)):
    response = requests.get(api_array[i]) #use APIs to get reponses via requests
    if not response: #if the API reponses a invalid result, measured YSI and predicted YSI are set to None
        print('No API response at row =', i, '. SMILES =', smiles_array[i], '. Name =', name_array[i], '. CAS =', cas_array[i])
        ysi_measure_array.append(None)
        ysi_predict_array.append(None)
        continue
    raw_data = response.json()
    if raw_data['outlier']: #if the compound is an outlier, measured YSI and predicted YSI are set to None
        ysi_measure_array.append(None)
        ysi_predict_array.append(None)
        continue
    ysi_measure_array.append(raw_data['exp_mean']) #add 'exp_mean' to an array called 'ysi_measure_array'
    ysi_predict_array.append(raw_data['mean']) #add 'mean' to an array called 'ysi_predict_array'
dataframe['measured YSI'] = ysi_measure_array
dataframe['predicted YSI'] = ysi_predict_array
dataframe

No API response at row = 0 . Invalid SMILES = [N-]=[N+]=O . Invalid Name = nan . Invalid CAS = nan
No API response at row = 126 . Invalid SMILES = N#CCc1[nH]cnc1N . Invalid Name = nan . Invalid CAS = nan
No API response at row = 133 . Invalid SMILES = no_SMILES . Invalid Name = nan . Invalid CAS = nan
No API response at row = 213 . Invalid SMILES = Cc1cc(C)c(C#N)c(C)c1 . Invalid Name = nan . Invalid CAS = nan
No API response at row = 268 . Invalid SMILES = no_SMILES . Invalid Name = nan . Invalid CAS = nan
No API response at row = 363 . Invalid SMILES = no_SMILES . Invalid Name = nan . Invalid CAS = nan
No API response at row = 401 . Invalid SMILES = N#CCn1c(C(F)F)nc2ccccc21 . Invalid Name = nan . Invalid CAS = nan


Unnamed: 0,SMILES,Name,CAS,measured YSI,predicted YSI
0,[N-]=[N+]=O,,,,
1,CCC(C)C,,,,30.654342
2,CC(C)=O,,,13.0,6.289966
3,CCCC(C)C,,,36.7,37.096191
4,CCC(C)CC,,,38.2,37.096191
...,...,...,...,...,...
403,CCO[Si](OCC)(O[Si](C)(C)C)O[Si](C)(C)C,,,,13.030004
404,COc1cc2c(cc1OC)C(=O)NCC2,,,,227.458124
405,COc1ccc(Oc2nonc2N)cc1,,,,9.967749
406,CCO[Si](OCC)(O[Si](C)(C)C)O[Si](C)(C)C,,,,13.030004


In [26]:
dataframe.to_excel('./YSI results.xlsx')