In [2]:
import numpy as np
import pandas as pd
from skopt import Optimizer
from skopt.learning import RandomForestRegressor
from skopt.space import Integer
import pickle

In [4]:
import warnings
warnings.filterwarnings("error")

In [None]:
# Define the bounds of the experimental conditions
bounds = [
         Integer(5, 75, name='metal_amount'), #range 0.5-7.5ml, step 0.1ml 
         Integer(5, 15, name='modulator'),   #amount of modulator NaOH (100mg/mL), range 0.5-1.5ml, step 0.1ml
         Integer(0, 30, name='add_solvent'), #amount of additional DI water, range 0-3.0mL, step 0.1ml
         Integer(1, 12, name='reaction_time'),   #reaction time, range 5-60 min, step 5 min
         Integer(10, 30, name='reaction_temperature')#reaction temperature, range 50-150C, step 5 C
         ] 

Only run for the FIRST time------
Initialize the Bayesian optimizer

In [None]:
rf = RandomForestRegressor(n_estimators=100, random_state=42, criterion="squared_error")

# Initial dataset
data = pd.read_csv("4N5N_dat.csv")
X_initial = data[['metal_amount', 'modulator', 'add_solvent', 'reaction_time', 'reaction_temperature']].values
y_initial = -data["crystallinity"].values

optimizer = Optimizer(bounds, base_estimator=rf, acq_func="EI", n_initial_points=data.shape[0], random_state=42)
optimizer.tell(X_initial.tolist(), y_initial.tolist())

In [None]:
# Get the first batch of experimental conditions
batch_size = 3 
MAX_ATTEMPT = 3
ntry = 0
while ntry < MAX_ATTEMPT:
    try:
        next_params = optimizer.ask(n_points=batch_size) #, strategy="cl_max")
        next_params_dict = pd.DataFrame({dim.name: value for dim, value in zip(bounds, np.array(next_params).T)})
        break
    except UserWarning:
        ntry += 1
        if ntry == MAX_ATTEMPT:
            print("Exceed max attempts...Drop duplicates if detected")
            next_params = optimizer.ask(n_points=batch_size)
            next_params_dict = pd.DataFrame({dim.name: value for dim, value in zip(bounds, np.array(next_params).T)})
            break
        print("One or more suggested experiments are duplicates of previous trials...Try again")

new_data = pd.concat([data, next_params_dict]).drop_duplicates(data.columns[:5], keep="first")

if new_data.shape[0] == data.shape[0]:
    print("No new experiments proposed...Manually change batch size")
else:   
    print(f"Suggested experiments: \n {new_data.tail(new_data.shape[0] - data.shape[0])[data.columns[:5]]}")

In [None]:
# save new experiment conditions to data file
new_data.to_csv("4N5N_dat.csv", index=False)

# save current optimizer
with open("saved_model.pkl", "wb") as f:
    pickle.dump(optimizer, f)

For subsequent update and query of next experiment conditions

In [14]:
# load saved optimizer
with open("saved_model.pkl", "rb") as f:
    optimizer = pickle.load(f)

# load exp data
data = pd.read_csv("4N5N_dat.csv")

Before running the following cell:  
Perform the experiments with the suggested conditions and manually enter the crystallinity scores to the dat file

In [None]:
# Bayesian optimization loop
batch_size = 3 # this is the number of experiments performed
ndata = data.shape[0]
#data.loc[ndata-batch_size: ndata-1, "FHWM"] = [...]
#data.loc[ndata-batch_size: ndata-1, "height"] = [...]
#data["crystallinity"] = data["height"]/data["FHWM"]
next_params_dict = data.tail(batch_size)

# Update the dataset and optimizer
optimizer.tell(next_params_dict[['metal_amount', 'modulator', 'add_solvent', 'reaction_time', 'reaction_temperature']].values.tolist(), 
           (-next_params_dict["crystallinity"].values).tolist())

In [19]:
# save new experiment conditions to data file
new_data.to_csv("4N5N_dat.csv", index=False)

# save current optimizer
with open("saved_model.pkl", "wb") as f:
    pickle.dump(optimizer, f)

In [None]:
# Retrieve the best experimental conditions
best_params = optimizer.Xi[np.argmax(optimizer.yi)]
best_params_dict = {dim.name: value for dim, value in zip(bounds, best_params)}
print(f"Best experimental conditions: {best_params_dict}")