In [6]:
import pandas as pd
import numpy as np
import math
import requests
import chemical_conversions

print(chemical_conversions.get_smiles_from_name('benzene')) #check for Name with benzene, should be 'c1ccccc1'
print(chemical_conversions.get_smiles_from_cas('110-82-7')) #check for CAS with cyclohexane, should be 'C1CCCCC1'

c1ccccc1
C1CCCCC1


In [7]:
#pass SMILES and Name from an excel sheet to a dataframe and pass them to different arrays
file_name = 'compounds.xlsx'
dataframe = pd.read_excel(file_name) #pass data to a dataframe called "dataframe"
smiles_array = dataframe['SMILES'].to_numpy() #pass SMILES to an array called "smiles_array"
name_array = dataframe['Name'].to_numpy() #pass Name to an array called "name_array"
cas_array = dataframe['CAS'].to_numpy() #pass CAS to an array called "cas_array"

if math.isnan(smiles_array[0]):
    print('There is no SMILES. Use Name instead.') #if there is no SMILES, use Name
    if type(name_array[0]) != str:
        print('There is no Name. Use CAS instead.') #if there is no Name, use CAS
        if type(cas_array[0]) != str:
            print('There is no CAS. Cannot fetch data.') #if there is no CAS, cannot fetch data
        else:
            smiles_array = []
            for i in range(len(cas_array)):
                smiles_array.append(chemical_conversions.get_smiles_from_cas(cas_array[i])) #get SMILES from CAS
            dataframe['SMILES'] = smiles_array #pass CAS-based SMILES to the dataframe
    else:
        smiles_array = []
        for i in range(len(name_array)):
            smiles_array.append(chemical_conversions.get_smiles_from_name(name_array[i])) #get SMILES from Name
        dataframe['SMILES'] = smiles_array #pass Name-based SMILES to the dataframe
dataframe

There is no SMILES. Use Name instead.
There is no Name. Use CAS instead.
error with CAS: 0-0-0, 	 HTTP Error 500: INTERNAL SERVER ERROR


Unnamed: 0,SMILES,Name,CAS
0,,,0-0-0
1,[N-]=[N+]=O,,10024-97-2
2,CCC(C)C,,78-78-4
3,CC(C)=O,,67-64-1
4,CCCC(C)C,,107-83-5
5,CCC(C)CC,,96-14-0
6,CCCCCC,,110-54-3
7,CC1CCCC1,,96-37-7


In [8]:
# create an array of API strings with SMILES appended
api_array = []
for i in range(len(smiles_array)):
    api = 'https://ysi.ml.nrel.gov/api/'
    if smiles_array[i] == None:
        smiles_array[i] = 'None'
    api += smiles_array[i]
    api_array.append(api)

In [9]:
#pass measured YSI and predicted YSI to the dataframe
ysi_measure_array = []
ysi_predict_array = []
for i in range(len(api_array)):
    response = requests.get(api_array[i]) #use APIs to get reponses via requests
    if not response: #if the API reponses a invalid result, measured YSI and predicted YSI are set to None
        print('No API response at row =', i+1, '. Invalid Name =', name_array[i], '. Invalid CAS =', cas_array[i])
        ysi_measure_array.append(None)
        ysi_predict_array.append(None)
        continue
    raw_data = response.json()
    if raw_data['outlier']: #if the compound is an outlier, measured YSI and predicted YSI are set to None
        ysi_measure_array.append(None)
        ysi_predict_array.append(None)
        continue
    ysi_measure_array.append(raw_data['exp_mean']) #add 'exp_mean' to an array called 'ysi_measure_array'
    ysi_predict_array.append(raw_data['mean']) #add 'mean' to an array called 'ysi_predict_array'
dataframe['measured YSI'] = ysi_measure_array
dataframe['predicted YSI'] = ysi_predict_array
dataframe

No API response at row = 1 . Invalid Name = nan . Invalid CAS = 0-0-0
No API response at row = 2 . Invalid Name = nan . Invalid CAS = 10024-97-2


Unnamed: 0,SMILES,Name,CAS,measured YSI,predicted YSI
0,,,0-0-0,,
1,[N-]=[N+]=O,,10024-97-2,,
2,CCC(C)C,,78-78-4,,30.654342
3,CC(C)=O,,67-64-1,13.0,6.289966
4,CCCC(C)C,,107-83-5,36.7,37.096191
5,CCC(C)CC,,96-14-0,38.2,37.096191
6,CCCCCC,,110-54-3,30.4,31.389208
7,CC1CCCC1,,96-37-7,50.3,50.422357


In [10]:
dataframe.to_excel('./YSI results.xlsx')