In [1]:
# Support for math
import numpy as np
import math

# Plotting tools
from matplotlib import pyplot as plt
import matplotlib
from matplotlib import cm

import warnings
warnings.filterwarnings('ignore')

# File Tools for local
import pandas as pd
import sys

# Random seed for reproducibility
import random

import torch
# from botorch.models.gp_regression import HeteroskedasticSingleTaskGP - This was removed in path #2616.
from botorch.models.gp_regression import SingleTaskGP
from botorch.acquisition import qExpectedImprovement
from botorch.fit import fit_gpytorch_mll
from botorch.utils.transforms import normalize
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.optim import optimize_acqf
from botorch.utils.transforms import normalize, unnormalize

from ipywidgets import interact, FloatSlider

#LHS sampling
#from pyDOE import lhs
from smt.sampling_methods import LHS
import random

# Cluster 
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler


>## Data Prep

In [2]:
df = pd.read_csv('parameterspace.csv')
df

Unnamed: 0,n,theta,r,t,toughness,mass,printerid,campaignid,category
0,6,0,1.5,0.70,1.144667,1.7131,1,1,2
1,6,0,1.5,1.05,1.607561,1.9386,2,1,2
2,6,0,1.5,1.40,1.144338,1.9828,3,1,2
3,6,0,1.7,0.70,3.642738,1.9723,4,1,2
4,6,0,1.7,1.05,3.748405,2.2785,5,1,2
...,...,...,...,...,...,...,...,...,...
1795,12,200,2.3,1.05,1.358975,6.3430,5,3,3
1796,12,200,2.3,1.40,3.196306,5.7742,1,3,3
1797,12,200,2.5,0.70,36.104187,4.8745,2,3,3
1798,12,200,2.5,1.05,1.313487,6.7221,4,3,3


In [3]:
x_inputs = df[['n',	'theta','r','t'] ].to_numpy()
y_outputs = df["toughness"].values.reshape(-1, 1)
# Set the device and dtype
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.float32
# Convert to torch tensors
x_all = torch.tensor(x_inputs, dtype=torch.float32)
y_all = torch.tensor(y_outputs, dtype=torch.float32)


# Define a ground truth 
def ground_truth(x_query_batch: torch.Tensor):
    """
    x_query_batch: Tensor of shape (batch_size, d)
    Returns: Tensor of shape (batch_size, 1)
    """
    # x_all should be of shape (N, d)
    # Expand for broadcasting: (batch_size, N, d)
    diffs = x_all.unsqueeze(0) - x_query_batch.unsqueeze(1)  # (batch, N, d)
    dists = torch.norm(diffs, dim=2)  # (batch, N)
    idx = torch.argmin(dists, dim=1)  # (batch,)
    return y_all[idx]  # (batch, 1)



>## Initial Sampling Step

In [4]:
bounds = torch.tensor([[x_all[:, 0].min(), x_all[:, 0].max()],
                      [x_all[:, 1].min(), x_all[:, 1].max()],
                      [x_all[:, 2].min(), x_all[:, 2].max()],
                      [x_all[:, 3].min(), x_all[:, 3].max()]], dtype=torch.float32)

#bounds = torch.tensor([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]], dtype=torch.float32)

# Lets say theta is constrained 
# Bounds : sulf , anly, temp, time
xlimits = bounds.numpy()
sampling = LHS(xlimits=xlimits,random_state = np.random.RandomState(0))

num = 6
x = sampling(num)

lhs_data = pd.DataFrame(x, columns=['n','theta','r','t'])
# print(lhs_data.round(2))

# Select the 'temp' column for clustering
constrained = lhs_data[['theta']]

# Standardize the data (important for K-Means)
scaler = StandardScaler()
temp_data_scaled = scaler.fit_transform(constrained )

# Choose the number of clusters
num_clusters = 3

# Apply K-Means clustering
kmeans = KMeans(n_clusters=num_clusters, n_init=100, random_state=42)
lhs_data['cluster'] = kmeans.fit_predict(temp_data_scaled)

# Get cluster assignments and centroids
cluster_assignments = kmeans.labels_

# Get the centroids
centroids = scaler.inverse_transform(kmeans.cluster_centers_)

# Display the centroids
print("Centroids of Temperature Clusters:")
print(centroids)

# Replace the temperature values in lhs_data with the centroid temperatures for the designated clusters
for cluster in range(num_clusters):
    lhs_data.loc[lhs_data['cluster'] == cluster, 'theta'] = centroids[cluster][0]

# Display the updated lhs_data
print(lhs_data.round(2))


Centroids of Temperature Clusters:
[[166.66666667]
 [ 33.33333333]
 [100.        ]]
      n   theta     r     t  cluster
0  11.5  100.00  1.75  1.11        2
1   8.5   33.33  2.08  0.99        1
2   9.5  166.67  1.58  0.87        0
3  10.5  166.67  2.42  1.34        0
4   7.5  100.00  2.25  0.76        2
5   6.5   33.33  1.92  1.22        1


In [12]:
## Build Model A class 
class ModelA:
    def __init__(self, x_train, y_train, bounds):
        self.x_train = x_train
        self.y_train = y_train
        self.bounds = bounds
        self.model = self._fit_gp_model()
    
    def _fit_gp_model(self):
        model = SingleTaskGP(self.x_train, self.y_train)
        mll = ExactMarginalLogLikelihood(model.likelihood, model)
        fit_gpytorch_mll(mll)
        return model

    def gp_evaluate(self, test_x):
        self.model.eval()
        with torch.no_grad():
            posterior = self.model.posterior(test_x)
        mean = posterior.mean.squeeze().numpy()
        var = posterior.variance.squeeze().numpy()
        return mean, var

    def optimize_ei(self, batch_size=6):
        best_f = self.y_train.max()
        qEI = qExpectedImprovement(model=self.model, best_f=best_f)
        candidate, _ = optimize_acqf(
            acq_function=qEI,
            bounds=torch.tensor([[0., 0., 0. , 0.], [1., 1., 1.,1.]], dtype=self.x_train.dtype),
            q=batch_size,
            num_restarts=15,
            raw_samples=100,
        )
        return unnormalize(candidate, self.bounds)
    
    def new_candidates(self, feature='theta', num_clusters=3):
        candidates = self.optimize_ei().cpu().numpy()  # <-- fix is here
        data = {
            'n': candidates[:, 0],
            'theta': candidates[:, 1],
            'r': candidates[:, 2],
            't': candidates[:, 3]
        }
        data_df = pd.DataFrame(data)
        constrained = data_df[[feature]]
        scaler = StandardScaler()
        temp_data_scaled = scaler.fit_transform(constrained)

        kmeans = KMeans(n_clusters=num_clusters, n_init=100, random_state=42)
        data_df['cluster'] = kmeans.fit_predict(temp_data_scaled)
        centroids = scaler.inverse_transform(kmeans.cluster_centers_)

        for cluster in range(num_clusters):
            data_df.loc[data_df['cluster'] == cluster, feature] = centroids[cluster][0]

        return data_df.round(2)

class Plotting:
    def __init__(self, modelA: ModelA, variable_combinations):
        self.modelA = modelA
        self.x_train = modelA.x_train
        self.y_train = modelA.y_train
        self.bounds = modelA.bounds
        self.model = modelA.model
        self.variable_combinations = variable_combinations
        
    def generate_input_data(self, A, B, c, d, combination):
        if combination == ('t', 'theta'):
            return torch.tensor(np.array([[A[i, j], B[i, j], c, d] for i in range(A.shape[0]) for j in range(A.shape[1])]), dtype=dtype)
        elif combination == ('t', 'n'):
            return torch.tensor(np.array([[B[i, j], c, d, A[i, j]] for i in range(A.shape[0]) for j in range(A.shape[1])]), dtype=dtype)
        elif combination == ('t', 'r'):
            return torch.tensor(np.array([[B[i, j], d, A[i, j], c] for i in range(A.shape[0]) for j in range(A.shape[1])]), dtype=dtype)
        elif combination == ('theta', 'n'):
            return torch.tensor(np.array([[c, A[i, j], B[i, j], d] for i in range(A.shape[0]) for j in range(A.shape[1])]), dtype=dtype)
        elif combination == ('theta', 'r'):
            return torch.tensor(np.array([[c, d, A[i, j], B[i, j]] for i in range(A.shape[0]) for j in range(A.shape[1])]), dtype=dtype)
        elif combination == ('n', 'r'):
            return torch.tensor(np.array([[d, c, A[i, j], B[i, j]] for i in range(A.shape[0]) for j in range(A.shape[1])]), dtype=dtype)

    def staircase_plot_with_histograms(self, c_fixed, d_fixed, color='viridis'):
        num_points = 20
        a = np.linspace(0, 1, num_points)
        b = np.linspace(0, 1, num_points)
        A, B = np.meshgrid(a, b)

        mean_values = []
        var_values = []
        for combination in self.variable_combinations:
            input_data = self.generate_input_data(A, B, c_fixed, d_fixed, combination)
            mean, var = self.modelA.gp_evaluate(input_data)
            mean_values.append(mean)
            var_values.append(var)

        num_features = self.x_train.shape[1]
        feature_names = ['n', 'theta', 'r', 't']

        fig, axes = plt.subplots(num_features, num_features, figsize=(12, 10))
        count = 0
        for i in range(num_features):
            for j in range(num_features):
                ax = axes[i, j]
                if i < j:
                    ax.axis('off')
                elif i == j:
                    ax.hist(self.x_train[:, i].numpy(), bins=10, color='gray', edgecolor='black', alpha=0.7)
                    ax.set_xlabel(feature_names[i])
                    ax.set_box_aspect(1)
                    ax.set_ylabel("Frequency")
                else:
                    ax.scatter(self.x_train[:, j].numpy(), self.x_train[:, i].numpy(), c=self.y_train.numpy(), cmap=color, s=50, alpha=0.5, marker='o')
                    sc = ax.contourf(A, B, mean_values[count].reshape(num_points, num_points), cmap=color, alpha=0.7, levels=10, vmax=self.y_train.max(), vmin=0)
                    fig.colorbar(sc, ax=ax, orientation='vertical')
                    ax.set_xlabel(feature_names[j])
                    ax.set_ylabel(feature_names[i])
                    ax.set_box_aspect(1)
                    count += 1
        plt.tight_layout()



In [None]:
original_bounds = torch.tensor([[x_all[:,0].min(), x_all[:,1].min(), x_all[:,2].min(), x_all[:,3].min()], [x_all[:,0].max(), x_all[:,1].max(), x_all[:,2].max(), x_all[:,3].max()]])
x_train = torch.tensor(lhs_data[['n','theta','r','t']].values, dtype=torch.float32)
nx_train = normalize(x_train, bounds=original_bounds)
y_train = ground_truth(x_train)


# Step 1: Instantiate and train model
modelA = ModelA(x_train=nx_train, y_train=y_train, bounds=original_bounds)

# Step 2: Define the combinations for your pair plots
variable_combinations = [('t', 'theta'), ('t', 'n'), ('t', 'r'), ('theta', 'n'), ('theta', 'r'), ('n', 'r')]

# Step 3: Create the plotting class using the trained model
plotter = Plotting(modelA=modelA, variable_combinations=variable_combinations)

# Step 4: Show the staircase plot for given c and d
@interact(c_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='c_fixed'),
                  d_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='d_fixed'))

def update_plot(c_fixed, d_fixed):
        plotter.staircase_plot_with_histograms(c_fixed, d_fixed)

# Step 5: Get the next candidates
new_candidates = modelA.new_candidates(feature='theta', num_clusters=3)
print("New Candidates:")
print(new_candidates.round(2))



interactive(children=(FloatSlider(value=0.5, description='c_fixed', max=1.0), FloatSlider(value=0.5, descripti…

New Candidates:
       n       theta     r     t  cluster
0  11.36  112.980003  2.50  0.76        0
1   6.00   80.669998  2.50  0.70        2
2   6.00  112.980003  1.93  0.86        0
3  11.36  112.980003  1.90  0.71        0
4   7.12  149.039993  2.26  0.70        1
5   7.26  112.980003  1.66  0.70        0


In [7]:
x_train1 = torch.tensor(new_candidates[['n','theta','r','t']].values, dtype=torch.float32)
nx_train1 = normalize(x_train1, bounds=original_bounds)
y_train1 = ground_truth(x_train1)

# Combine the new data with the existing data
x_combined = torch.cat((nx_train, nx_train1), dim=0)
y_combined = torch.cat((y_train, y_train1), dim=0)

# Step 1: Instantiate and train model
modelA = ModelA(x_train=x_combined, y_train=y_combined, bounds=original_bounds)

# Step 3: Create the plotting class using the trained model
plotter = Plotting(modelA=modelA, variable_combinations=variable_combinations)

# Step 4: Show the staircase plot for given c and d
@interact(c_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='c_fixed'),
                  d_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='d_fixed'))

def update_plot(c_fixed, d_fixed):
        plotter.staircase_plot_with_histograms(c_fixed, d_fixed)

# Step 5: Get the next candidates
new_candidates = modelA.new_candidates(feature='theta', num_clusters=3)
print("New Candidates:")
print(new_candidates.round(2))


interactive(children=(FloatSlider(value=0.5, description='c_fixed', max=1.0), FloatSlider(value=0.5, descripti…

New Candidates:
       n       theta     r     t  cluster
0  11.19  200.000000  1.76  0.70        2
1  12.00  134.399994  2.05  0.70        0
2  10.31  134.399994  2.47  0.70        0
3   9.91  200.000000  1.50  1.27        2
4  12.00  200.000000  2.50  0.70        2
5  11.84   14.420000  2.50  0.72        1


In [8]:
x_train2 = torch.tensor(new_candidates[['n','theta','r','t']].values, dtype=torch.float32)
nx_train2 = normalize(x_train2, bounds=original_bounds)
y_train2 = ground_truth(x_train2)

# Combine the new data with the existing data
x_combined = torch.cat((nx_train, nx_train1,nx_train2), dim=0)
y_combined = torch.cat((y_train, y_train1,y_train2), dim=0)


# Step 1: Instantiate and train model
modelA = ModelA(x_train=x_combined, y_train=y_combined, bounds=original_bounds)

# Step 3: Create the plotting class using the trained model
plotter = Plotting(modelA=modelA, variable_combinations=variable_combinations)

# Step 4: Show the staircase plot for given c and d
@interact(c_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='c_fixed'),
                  d_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='d_fixed'))

def update_plot(c_fixed, d_fixed):
        plotter.staircase_plot_with_histograms(c_fixed, d_fixed)

# Step 5: Get the next candidates
new_candidates = modelA.new_candidates(feature='theta', num_clusters=3)
print("New Candidates:")
print(new_candidates.round(2))

interactive(children=(FloatSlider(value=0.5, description='c_fixed', max=1.0), FloatSlider(value=0.5, descripti…

New Candidates:
       n       theta     r     t  cluster
0  12.00   32.349998  2.36  0.70        0
1   6.45  181.160004  2.21  0.70        1
2  10.94   32.349998  2.50  0.70        0
3   8.01   32.349998  2.02  1.00        0
4  12.00   32.349998  2.50  0.92        0
5  11.87  116.279999  1.61  0.70        2


In [9]:
x_train3 = torch.tensor(new_candidates[['n','theta','r','t']].values, dtype=torch.float32)
nx_train3 = normalize(x_train2, bounds=original_bounds)
y_train3 = ground_truth(x_train2)

# Combine the new data with the existing data
x_combined = torch.cat((nx_train, nx_train1,nx_train2, nx_train3), dim=0)
y_combined = torch.cat((y_train, y_train1,y_train2,y_train3), dim=0)


# Step 1: Instantiate and train model
modelA = ModelA(x_train=x_combined, y_train=y_combined, bounds=original_bounds)

# Step 3: Create the plotting class using the trained model
plotter = Plotting(modelA=modelA, variable_combinations=variable_combinations)

# Step 4: Show the staircase plot for given c and d
@interact(c_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='c_fixed'),
                  d_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='d_fixed'))

def update_plot(c_fixed, d_fixed):
        plotter.staircase_plot_with_histograms(c_fixed, d_fixed)

# Step 5: Get the next candidates
new_candidates = modelA.new_candidates(feature='theta', num_clusters=3)
print("New Candidates:")
print(new_candidates.round(2))

interactive(children=(FloatSlider(value=0.5, description='c_fixed', max=1.0), FloatSlider(value=0.5, descripti…

New Candidates:
       n       theta     r     t  cluster
0  11.76  153.309998  2.36  1.01        0
1  12.00   41.320000  2.50  0.70        1
2  10.46  153.309998  2.50  0.73        0
3  11.25   41.320000  2.37  0.70        1
4  12.00    9.340000  2.50  0.95        2
5   9.43   41.320000  2.21  1.01        1


In [10]:
x_train4 = torch.tensor(new_candidates[['n','theta','r','t']].values, dtype=torch.float32)
nx_train4 = normalize(x_train2, bounds=original_bounds)
y_train4 = ground_truth(x_train2)

# Combine the new data with the existing data
x_combined = torch.cat((nx_train, nx_train1,nx_train2, nx_train3,nx_train4), dim=0)
y_combined = torch.cat((y_train, y_train1,y_train2,y_train3,y_train4), dim=0)


# Step 1: Instantiate and train model
modelA = ModelA(x_train=x_combined, y_train=y_combined, bounds=original_bounds)

# Step 3: Create the plotting class using the trained model
plotter = Plotting(modelA=modelA, variable_combinations=variable_combinations)

# Step 4: Show the staircase plot for given c and d
@interact(c_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='c_fixed'),
                  d_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='d_fixed'))

def update_plot(c_fixed, d_fixed):
        plotter.staircase_plot_with_histograms(c_fixed, d_fixed)

# Step 5: Get the next candidates
new_candidates = modelA.new_candidates(feature='theta', num_clusters=3)
print("New Candidates:")
print(new_candidates.round(2))

interactive(children=(FloatSlider(value=0.5, description='c_fixed', max=1.0), FloatSlider(value=0.5, descripti…

New Candidates:
       n       theta     r     t  cluster
0  12.00   15.590000  2.50  0.93        2
1   9.38  181.750000  2.10  0.70        0
2   8.57  181.750000  1.84  1.40        0
3  12.00   47.009998  2.50  0.70        1
4   9.34   47.009998  1.51  0.83        1
5  11.33   47.009998  2.37  0.70        1


In [11]:
x_train5 = torch.tensor(new_candidates[['n','theta','r','t']].values, dtype=torch.float32)
nx_train5 = normalize(x_train2, bounds=original_bounds)
y_train5 = ground_truth(x_train2)

# Combine the new data with the existing data
x_combined = torch.cat((nx_train, nx_train1,nx_train2, nx_train3,nx_train4,nx_train5), dim=0)
y_combined = torch.cat((y_train, y_train1,y_train2,y_train3,y_train4,y_train5), dim=0)


# Step 1: Instantiate and train model
modelA = ModelA(x_train=x_combined, y_train=y_combined, bounds=original_bounds)

# Step 3: Create the plotting class using the trained model
plotter = Plotting(modelA=modelA, variable_combinations=variable_combinations)

# Step 4: Show the staircase plot for given c and d
@interact(c_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='c_fixed'),
                  d_fixed=FloatSlider(min=0.0, max=1.0, step=0.1, value=0.5, description='d_fixed'))

def update_plot(c_fixed, d_fixed):
        plotter.staircase_plot_with_histograms(c_fixed, d_fixed)

# Step 5: Get the next candidates
new_candidates = modelA.new_candidates(feature='theta', num_clusters=3)
print("New Candidates:")
print(new_candidates.round(2))

interactive(children=(FloatSlider(value=0.5, description='c_fixed', max=1.0), FloatSlider(value=0.5, descripti…

New Candidates:
       n      theta     r     t  cluster
0  12.00  37.029999  2.50  0.70        2
1   6.40  86.629997  2.13  0.85        1
2   8.79  50.060001  2.02  0.96        0
3  10.81  37.029999  2.50  0.70        2
4  11.74  37.029999  2.35  0.70        2
5   8.00  50.060001  2.41  0.83        0
