In [None]:
!pip install scikit-optimize

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from skopt import gp_minimize
from skopt.learning import GaussianProcessRegressor
from skopt.space import Categorical
from skopt.plots import plot_convergence

In [None]:
# Suppress warnings
warnings.filterwarnings("ignore")

# Load dataset from Google Drive
file_path = ""#File path to the unique set of parameters
df = pd.read_csv(file_path, encoding='ISO-8859-1')

# Normalize 'Coverage Percentage' and 'Average Size um'
df["Coverage Percentage"] = (df["Coverage Percentage"] - df["Coverage Percentage"].min()) / \
                            (df["Coverage Percentage"].max() - df["Coverage Percentage"].min())

df["Average Size um"] = (df["Average Size um"] - df["Average Size um"].min()) / \
                        (df["Average Size um"].max() - df["Average Size um"].min())

In [None]:
# Extract all valid parameter combinations
valid_combinations = list(
    zip(
        df["Spincoating Speed"],
        df["Substrates preheated temperature"],
        df["Solution preheated temperature"],
        df["Composition"],
        df["Antisolvent Used"]
    )
)

# Define categorical search space (only valid values)
search_space = [
    Categorical(df["Spincoating Speed"].unique().tolist(), name="Spincoating Speed"),
    Categorical(df["Substrates preheated temperature"].unique().tolist(), name="Substrates preheated temperature"),
    Categorical(df["Solution preheated temperature"].unique().tolist(), name="Solution preheated temperature"),
    Categorical(df["Composition"].unique().tolist(), name="Composition"),
    Categorical(df["Antisolvent Used"].unique().tolist(), name="Antisolvent Used"),
]


In [None]:
# Objective function (directly receives positional arguments)
def objective(params):
    spincoating_speed, substrate_temp, solution_temp, composition, antisolvent = params

    param_tuple = (spincoating_speed, substrate_temp, solution_temp, composition, antisolvent)

    if param_tuple not in valid_combinations:
        return 1e6  # Large penalty to ignore this selection

    # Find matching row
    row = df[
        (df["Spincoating Speed"] == spincoating_speed) &
        (df["Substrates preheated temperature"] == substrate_temp) &
        (df["Solution preheated temperature"] == solution_temp) &
        (df["Composition"] == composition) &
        (df["Antisolvent Used"] == antisolvent)
    ]

    # Compute weighted objective: 70% maximizing grain size, 30% minimizing defect percentage
    defect_percentage = row["Coverage Percentage"].values[0]
    grain_size = row["Average Size um"].values[0]

    objective_value = -((0.7 * grain_size) - (0.3 * defect_percentage))  # Minimize for gp_minimize
    return objective_value


In [None]:
# Custom function to generate only valid random samples
def generate_valid_samples(n_samples):
    return [valid_combinations[np.random.randint(len(valid_combinations))] for _ in range(n_samples)]


In [None]:
# Bayesian Optimization Loop (ensuring only valid selections count)
iterations = 20  # We want exactly 20 valid iterations
n_initial = 5

# Define surrogate model (Gaussian Process)
gp = GaussianProcessRegressor()

acq_functions = ["EI", "PI", "LCB", "TS"]  # Added TS
results = []
csv_data = []  # Store data for saving as CSV

for acq_name in acq_functions:
    valid_results = []  # Track valid function evaluations
    valid_x_iters = []  # Track valid parameter selections

    # Get initial valid samples
    initial_samples = generate_valid_samples(n_initial)

    valid_iterations = 0  # Count only valid iterations
    while valid_iterations < iterations:
        # Select a valid random sample from the dataset
        candidate_params = generate_valid_samples(1)[0]  # Returns a tuple

        # Evaluate the objective function
        y = objective(candidate_params)

        # If the function returns a valid value, count it
        if y < 1e6:
            valid_results.append(y)
            valid_x_iters.append(candidate_params)
            valid_iterations += 1  # Count valid selection

            # Save data for CSV
            csv_data.append([valid_iterations, acq_name, y] + list(candidate_params))

    # Store final results
    results.append({"acquisition_function": acq_name, "x_iters": valid_x_iters, "func_vals": valid_results})



In [None]:
# Convert results to a DataFrame and save as CSV
csv_df = pd.DataFrame(csv_data, columns=["Iteration", "Acquisition Function", "Objective Value",
                                         "Spincoating Speed", "Substrates preheated temperature",
                                         "Solution preheated temperature", "Composition", "Antisolvent Used"])
csv_file_path = "/content/drive/MyDrive/DAISY2.0 Data/Bayesian_Optimization_Results.csv"
csv_df.to_csv(csv_file_path, index=False)

print(f"Results saved to {csv_file_path}")


In [None]:
# Plot optimization progress (only valid iterations)
plt.figure(figsize=(10, 6))
for result in results:
    acq_name = result["acquisition_function"]
    plt.plot(range(1, len(result["func_vals"]) + 1), result["func_vals"], label=acq_name)

plt.xlabel("Valid Iteration")
plt.ylabel("Objective Function Value")
plt.title("Bayesian Optimization Progress (Valid Selections Only)")
plt.legend()
plt.grid()
plt.show()