In [None]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import optuna
import optuna.visualization
import matplotlib.pyplot as plt

In [None]:
# Load the dataset from Google Drive
file_path = ''  # Update with your file path
df = pd.read_csv(file_path, encoding='ISO-8859-1')

# Remove leading and trailing spaces from column headers
df.columns = df.columns.str.strip()

In [None]:
# Extract relevant columns
features = ['Spincoating Speed', 'Antisolvent Used',
            'Substrates preheated Temperature', 'Solution preheated temperature']
target_defect = 'Coverage Percentage'  # Minimize
target_grain = 'Average Size um'       # Maximize

# Drop NA rows only for targets (not parameters)
df = df.dropna(subset=[target_defect, target_grain])

In [None]:
from sklearn.preprocessing import OneHotEncoder

# Handle categorical encoding
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

# Fit and transform the categorical parameters
categorical_data = encoder.fit_transform(df[features])

# Combine the one-hot encoded data
X = pd.DataFrame(
    data=categorical_data,
    columns=encoder.get_feature_names_out(features)
)

# Extract target values
y_defect = df[target_defect].values
y_grain = df[target_grain].values

In [None]:
import numpy as np

# Weighted objective: 30% weight to defect percentage, 70% to grain size
def objective(params):
    indices = [int(p) for p in params]  # Convert params to indices
    defect = y_defect[indices[0]]
    grain = y_grain[indices[1]]

    # Combined objective (higher grain size, lower defect)
    return 0.3 * defect - 0.7 * grain


In [None]:
from skopt.space import Integer

# Define the search space as indices of the dataset
n_samples = len(df)
space = [
    Integer(0, n_samples - 1, name="defect_index"),
    Integer(0, n_samples - 1, name="grain_index")
]


In [None]:
from skopt import Optimizer
import matplotlib.pyplot as plt

# Acquisition functions to test
acq_funcs = ['EI', 'PI', 'LCB','TS']
results = {}

# Loop through acquisition functions
for acq in acq_funcs:
    if acq == 'TS':
        # Use a custom implementation for Thompson Sampling
        optimizer = Optimizer(
            dimensions=space,
            base_estimator="gp",
            random_state=42
        )
    else:
        # Use predefined acquisition functions for EI, PI, LCB
        optimizer = Optimizer(
            dimensions=space,
            base_estimator="gp",
            acq_func=acq,
            random_state=42
        )

    # Perform optimization
    for i in range(50):  # Number of iterations
        if acq == 'TS':
            # Thompson Sampling: Sample from posterior
            suggested = optimizer.ask(strategy='ts')  # Thompson Sampling strategy
        else:
            # Standard acquisition function
            suggested = optimizer.ask()

        y = objective(suggested)
        optimizer.tell(suggested, y)

    # Save results
    results[acq] = optimizer

    # Plot optimization progress
    plt.plot(range(len(optimizer.yi)), optimizer.yi, label=f'{acq}')
    plt.xlabel('Iteration')
    plt.ylabel('Objective Value')
    plt.title('Optimization Progress')
    plt.legend()
plt.show()

In [None]:
for acq, optimizer in results.items():
    best_idx = np.argmin(optimizer.yi)
    best_params = optimizer.Xi[best_idx]
    print(f'Acquisition Function: {acq}')
    print(f'Best Parameters: {best_params}')
    print(f'Best Objective Value: {optimizer.yi[best_idx]}')
