In [1]:
from predict_snar.data import SolventPicker
import pandas as pd
from joblib import Parallel, delayed

In [2]:
def get_influential_solvent(solvent):
    sp = SolventPicker()
    influential_solvent = sp.get_influential_solvent(solvent)
    return influential_solvent

In [3]:
def get_solvent_smiles(solvent):
    sp = SolventPicker()
    influential_solvent = sp.smiles_from_name(solvent)
    return influential_solvent  

### Read kinetic data

In [11]:
sp = SolventPicker()

In [12]:
sp.get_influential_solvent("Ethanol/Water (95:5 v/v)")

'Ethanol'

In [13]:
sp.get_influential_solvent("Dioxane/Water (60:40 v/v)")

'Water'

In [4]:
df_all = pd.read_excel("kinetic_data_v4.xlsx")

Drop unneeded columns and rename rest

In [5]:
df = df_all[["Reaction_SMILES", "Activation free Energy (kcal) Eyring", "Temp (K)", "Solvent"]]
df = df.dropna()
df.columns = ["reaction_smiles", "activation_energy", "temperature", "solvent"]

Check how many reactions have TMS solvent

In [9]:
df["solvent"][df["solvent"] == "TMS"]

219    TMS
468    TMS
Name: solvent, dtype: object

Check how many datapoints we have

In [10]:
df.shape

(503, 4)

Get influential solvents

In [52]:
sp = SolventPicker()
influential_solvents = Parallel(n_jobs=4)(delayed(get_influential_solvent)(solvent) for solvent in df["solvent"])

In [53]:
df["influential_solvent"] = influential_solvents

In [54]:
smiles = Parallel(n_jobs=4)(delayed(get_solvent_smiles)(solvent) for solvent in influential_solvents)

In [55]:
df["solvent_smiles"] = smiles

Save kinetic data file to use later in the modelling process

In [56]:
df[["reaction_smiles", "activation_energy", "solvent_smiles", "temperature"]].to_csv("kinetic_data.csv", sep=",", index=False, header=True)

Save all the data

In [57]:
df.to_csv("processed_data.csv")

Save input file for the predict_snar workflow

In [24]:
with open("smiles", "w") as file:
    for i, row in df.iterrows():
        file.write(f'{row["reaction_smiles"]} {row["temperature"]} {row["influential_solvent"]}\n')