In [None]:
import numpy as np


In [None]:
datain = np.load('initial_inputs.npy')
dataout = np.load('initial_outputs.npy')

In [None]:
print(datain)
print(datain.shape)   # Useful if it’s an array
print(type(datain)) 

In [None]:
print(dataout)
print(dataout.shape)   # Useful if it’s an array
print(type(dataout)) 

Update with new data

In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from scipy.stats import norm

# --- 1. Initial data ---
X_init = np.array([
    [0.31940389, 0.76295937],
    [0.57432921, 0.8798981],
    [0.73102363, 0.73299988],
    [0.84035342, 0.26473161],
    [0.65011406, 0.68152635],
    [0.41043714, 0.1475543],
    [0.31269116, 0.07872278],
    [0.68341817, 0.86105746],
    [0.08250725, 0.40348751],
    [0.88388983, 0.58225397]
])

y_init = np.array([
    1.32267704e-079,  1.03307824e-046,  7.71087511e-016,
    3.34177101e-124, -3.60606264e-003, -2.15924904e-054,
    -2.08909327e-091,  2.53500115e-040,  3.60677119e-081,
    6.22985647e-048
])

In [None]:
# Add the new observation
X_new = np.array([[0.080808, 0.404040]])
y_new = np.array([5.34214011784672e-82])

# Combine with previous data
X_all = np.vstack([X_init, X_new])
y_all = np.concatenate([y_init, y_new])

X_init = X_all
y_init = y_all

In [None]:
print(X_init)
print(X_init.shape)   # Useful if it’s an array
print(type(X_init)) 

In [None]:
print(y_init)
print(y_init.shape)   # Useful if it’s an array
print(type(y_init)) 

# Week 1 UCB method


In [None]:
# Optional: Log-transform to handle sparse/near-zero outputs
#y_trans = np.log(np.abs(y_init) + 1e-8)
y_trans = y_init.copy()  # Use raw outputs


# --- 2. Fit Gaussian Process ---
#kernel = Matern(nu=2.5)
#gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True)

kernel = Matern(length_scale=0.1, nu=2.5)
gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True)

gp.fit(X_init, y_trans)

# --- 3. Define UCB acquisition function ---
def acquisition_ucb(X, gp, kappa=2.0):
    mu, sigma = gp.predict(X, return_std=True)
    return mu + kappa * sigma

# --- 4. Generate candidate points on a 2D grid ---
grid_size = 100
x1 = np.linspace(0, 1, grid_size)
x2 = np.linspace(0, 1, grid_size)
X_candidates = np.array([[i, j] for i in x1 for j in x2])

# --- 5. Evaluate acquisition function ---
acq_values = acquisition_ucb(X_candidates, gp, kappa=2.5)

# --- 6. Select next point ---
next_point = X_candidates[np.argmax(acq_values)]
print("Next point to query:", next_point)


# Week 2 EI method


In [None]:
# Stabilize and preserve sign (for small negative readings)
y_trans = np.sign(y_all) * np.log10(np.abs(y_all) + 1e-20)


In [None]:
kernel = Matern(length_scale=0.15, nu=2.5)
gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True)
gp.fit(X_all, y_trans)

###
#Note: slightly increase length_scale — your previous grid suggests spatial correlation extends across ~0.1–0.2 units.
###

# Choose a smarter acquisition strategy
Bootstrapping is a resampling method where you generate multiple samples with replacement from your original data. You then calculate the statistic (mean, median, correlation, etc.) on each sample to understand the variability and uncertainty of that statistic.

**Scenario**
UCB (mu + κσ) is good for exploration, but since I have mostly near-zero outputs, we may want to mix in exploration and exploitation adaptively.

Try using Expected Improvement (EI) instead of UCB.
It’s more focused on discovering true peaks when most readings are near noise.

In [None]:
from scipy.stats import norm

def acquisition_ei(X, gp, y_best, xi=0.01):
    mu, sigma = gp.predict(X, return_std=True)
    sigma = sigma.reshape(-1, 1)
    mu = mu.reshape(-1, 1)

    imp = mu - y_best - xi
    Z = imp / (sigma + 1e-9)
    ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
    return ei.ravel()


In [None]:
# Generate 2D grid of candidates
grid_size = 100
x1 = np.linspace(0, 1, grid_size)
x2 = np.linspace(0, 1, grid_size)
X_candidates = np.array([[i, j] for i in x1 for j in x2])

# Best observed value
y_best = np.max(y_trans)

# Compute acquisition values
acq_values = acquisition_ei(X_candidates, gp, y_best, xi=0.02)

# Select next point
next_point = X_candidates[np.argmax(acq_values)]
print("Next point to query:", next_point)


# Week 3 

In [None]:
# --------------------------------------------------------------------
# 1️⃣ Prepare your data
# --------------------------------------------------------------------
# Add the new observation
X_new = np.array([[0.393939, 0.070707]])
y_new = np.array([4.676119097408122e-88])

# Combine with previous data
X_all = np.vstack([X_init, X_new])
y_all = np.concatenate([y_init, y_new])

X_init = X_all
y_init = y_all

In [None]:
print(X_all)
print(y_all)


In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from scipy.stats import norm

# --------------------------------------------------------------------
# 2️⃣ Signed log10 transform (stable handling of small/negative values)
# --------------------------------------------------------------------
eps = 1e-20
signs = np.sign(y_all)
signs[signs == 0] = 1.0
y_trans = signs * np.log10(np.abs(y_all) + eps)

# --------------------------------------------------------------------
# 3️⃣ Fit Gaussian Process with Matern kernel
# --------------------------------------------------------------------
kernel = Matern(length_scale=0.15, nu=2.5)
gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True)
gp.fit(X_all, y_trans)

# --------------------------------------------------------------------
# 4️⃣ Define Expected Improvement (EI) acquisition function
# --------------------------------------------------------------------
def acquisition_ei(X, gp, y_best, xi=0.02):
    mu, sigma = gp.predict(X, return_std=True)
    mu = mu.reshape(-1, 1)
    sigma = sigma.reshape(-1, 1)

    imp = mu - y_best - xi
    Z = np.divide(imp, sigma, out=np.zeros_like(imp), where=sigma > 1e-9)
    ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
    ei[sigma < 1e-9] = 0.0
    return ei.ravel()

# --------------------------------------------------------------------
# 5️⃣ Create grid of candidate points
# --------------------------------------------------------------------
grid_size = 100
x1 = np.linspace(0, 1, grid_size)
x2 = np.linspace(0, 1, grid_size)
X_candidates = np.array([[i, j] for i in x1 for j in x2])

# --------------------------------------------------------------------
# 6️⃣ Compute acquisition and select next point
# --------------------------------------------------------------------
y_best = np.max(y_trans)
acq_values = acquisition_ei(X_candidates, gp, y_best, xi=0.02)

next_point = X_candidates[np.argmax(acq_values)]
print("Next point to query:", next_point)


In [None]:
print(y_trans)

In [None]:
print(y_best)

# composite week 3

In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from scipy.stats import norm

# --- 1. Updated dataset (including new points) ---
X_all = np.array([
    [0.31940389, 0.76295937],
    [0.57432921, 0.8798981],
    [0.73102363, 0.73299988],
    [0.84035342, 0.26473161],
    [0.65011406, 0.68152635],
    [0.41043714, 0.1475543],
    [0.31269116, 0.07872278],
    [0.68341817, 0.86105746],
    [0.08250725, 0.40348751],
    [0.88388983, 0.58225397],
    [0.080808,   0.404040],     # New point 1
    [0.393939,   0.070707]      # New point 2
])

y_all = np.array([
    1.32267704e-079,  1.03307824e-046,  7.71087511e-016,
    3.34177101e-124, -3.60606264e-003, -2.15924904e-054,
    -2.08909327e-091,  2.53500115e-040,  3.60677119e-081,
    6.22985647e-048,   5.34214011784672e-82,   # New output 1
    4.676119097408122e-88                    # New output 2
])

# --- 2. Signed log10 transform to handle wide dynamic range ---
eps = 1e-20
signs = np.sign(y_all)
signs[signs == 0] = 1.0
y_trans = signs * np.log10(np.abs(y_all) + eps)

# --- 3. Fit Gaussian Process model ---
kernel = Matern(length_scale=0.1, nu=2.5)
gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True)
gp.fit(X_all, y_trans)

# --- 4. Expected Improvement (EI) acquisition function ---
def acquisition_ei(X, gp, y_best, xi=0.01):
    mu, sigma = gp.predict(X, return_std=True)
    sigma = sigma.reshape(-1, 1)
    mu = mu.reshape(-1, 1)
    imp = mu - y_best - xi
    Z = imp / (sigma + 1e-9)
    ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
    return ei.ravel()

# --- 5. Generate candidate points (with margin to avoid edges) ---
grid_size = 100
margin = 0.02
x1 = np.linspace(margin, 1 - margin, grid_size)
x2 = np.linspace(margin, 1 - margin, grid_size)
X_candidates = np.array([[i, j] for i in x1 for j in x2])

# --- 6. Compute EI across the grid ---
y_best = np.max(y_trans)
acq_values = acquisition_ei(X_candidates, gp, y_best, xi=0.02)

# --- 7. Select the next point ---
next_point = X_candidates[np.argmax(acq_values)]
best_ei = np.max(acq_values)

# --- 8. Display results with precision ---
print(f"Next point to query: [{next_point[0]:.6f}, {next_point[1]:.6f}], EI = {best_ei:.6f}")


# Week 4

# New Reading (≈9.13e-225) is astronomically tiny (far smaller than your previous smallest).
# the location [0.02, 0.02] is effectively clean — no detectable source there.
# adding this point will reduce GP uncertainty locally around the bottom-left interior. 
# But it strengthens the overall conclusion that most sampled regions so far are essentially zero. 
# That increases the relative value of exploring truly unsampled or poorly sampled regions of the domain.
# I will Raise to xi = 0.05 for more exploration (previously 0.02)

# First attempt picked a point right next to my last point, even after you increased xi to 0.05
# that’s a clear signal that something deeper is shaping the behavior of my GP, not just the xi parameter.
# Fix: increase the length_scale, e.g. to 0.4 from 0.15
# Effect: If two points are closer than the length_scale, the GP assumes their readings are strongly correlated.
# If they’re farther apart, it assumes their readings are largely independent.

In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from scipy.stats import norm

# --------------------------
# 1) Existing data (12) + new reading
# --------------------------
X_all = np.array([
    [0.31940389, 0.76295937],
    [0.57432921, 0.87989810],
    [0.73102363, 0.73299988],
    [0.84035342, 0.26473161],
    [0.65011406, 0.68152635],
    [0.41043714, 0.14755430],
    [0.31269116, 0.07872278],
    [0.68341817, 0.86105746],
    [0.08250725, 0.40348751],
    [0.88388983, 0.58225397],
    [0.08080800, 0.40404000],
    [0.39393900, 0.07070700]
])

y_all = np.array([
    1.32267704e-79,
    1.03307824e-46,
    7.71087511e-16,
    3.34177101e-124,
    -3.60606264e-03,
    -2.15924904e-54,
    -2.08909327e-91,
    2.53500115e-40,
    3.60677119e-81,
    6.22985647e-48,
    5.34214011784672e-82,
    4.676119097408122e-88
])

# Append latest measurement: [0.02, 0.02] -> 9.127963232956071e-225
X_all = np.vstack([X_all, [0.020000, 0.020000]])
y_all = np.concatenate([y_all, np.array([9.127963232956071e-225])])

# --------------------------
# 2) Signed log10 transform
# --------------------------
eps = 1e-20
signs = np.sign(y_all)
signs[signs == 0] = 1.0
y_trans = signs * np.log10(np.abs(y_all) + eps)

# --------------------------
# 3) GP fit
# --------------------------
kernel = Matern(length_scale=0.4, nu=2.5)   # length_scale chosen for modest smoothing
gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True)
gp.fit(X_all, y_trans)

# --------------------------
# 4) Expected Improvement (stable)
# --------------------------
def acquisition_ei(X, gp, y_best, xi=0.02):
    print(f"xi[{xi}]")

    mu, sigma = gp.predict(X, return_std=True)
    mu = mu.reshape(-1,1)
    sigma = sigma.reshape(-1,1)
    imp = mu - y_best - xi
    # safe division
    Z = np.divide(imp, sigma, out=np.zeros_like(imp), where=sigma>1e-9)
    ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
    ei[sigma < 1e-9] = 0.0
    return ei.ravel()

# --------------------------
# 5) Candidate grid (interior margin to avoid edge artifacts)
# --------------------------
grid_size = 100
margin = 0.02
x1 = np.linspace(margin, 1.0 - margin, grid_size)
x2 = np.linspace(margin, 1.0 - margin, grid_size)
X_candidates = np.array([[i,j] for i in x1 for j in x2])

# --------------------------
# 6) Compute EI and choose top points
# --------------------------
y_best = np.max(y_trans)
ei_vals = acquisition_ei(X_candidates, gp, y_best, xi=0.1)

# single best (six decimals)
best_idx = np.argmax(ei_vals)
next_point = X_candidates[best_idx]
best_ei = ei_vals[best_idx]
print(f"Next point to query: [{next_point[0]:.6f}, {next_point[1]:.6f}], EI = {best_ei:.6f}")

# optional: top-3 diverse batch (greedy max-EI with min-distance repulsion)
k = 3
selected = []
candidates = X_candidates.copy()
ei_copy = ei_vals.copy()
for _ in range(k):
    idx = np.argmax(ei_copy)
    selected.append(candidates[idx])
    # zero out neighbors within a radius to encourage diversity
    dists = np.linalg.norm(candidates - candidates[idx], axis=1)
    ei_copy[dists < 0.08] = 0.0   # radius ~8% of domain
# print batch
for i, p in enumerate(selected, 1):
    print(f"Batch #{i}: [{p[0]:.6f}, {p[1]:.6f}]")


# switching to ucb
# just swapping EI for UCB with kappa=4.0.

In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
# --------------------------
# 1) Existing data (12) + new reading
# --------------------------
X_all = np.array([
    [0.31940389, 0.76295937],
    [0.57432921, 0.87989810],
    [0.73102363, 0.73299988],
    [0.84035342, 0.26473161],
    [0.65011406, 0.68152635],
    [0.41043714, 0.14755430],
    [0.31269116, 0.07872278],
    [0.68341817, 0.86105746],
    [0.08250725, 0.40348751],
    [0.88388983, 0.58225397],
    [0.08080800, 0.40404000],
    [0.39393900, 0.07070700]
])

y_all = np.array([
    1.32267704e-79,
    1.03307824e-46,
    7.71087511e-16,
    3.34177101e-124,
    -3.60606264e-03,
    -2.15924904e-54,
    -2.08909327e-91,
    2.53500115e-40,
    3.60677119e-81,
    6.22985647e-48,
    5.34214011784672e-82,
    4.676119097408122e-88
])

# Append latest measurement: [0.02, 0.02] -> 9.127963232956071e-225
X_all = np.vstack([X_all, [0.020000, 0.020000]])
y_all = np.concatenate([y_all, np.array([9.127963232956071e-225])])

In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern

# --- 1. Existing data (replace with your own arrays) ---
# X_all and y_all should already include all past samples
# For example:
# X_all = np.array([...])
# y_all = np.array([...])

# --- 2. Signed log transform for stable scaling ---
eps = 1e-20
signs = np.sign(y_all)
signs[signs == 0] = 1.0
y_trans = signs * np.log10(np.abs(y_all) + eps)

# --- 3. Fit Gaussian Process ---
kernel = Matern(length_scale=1.0, nu=2.5)
gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True)
gp.fit(X_all, y_trans)

# --- 4. Define UCB acquisition function ---
def acquisition_ucb(X, gp, kappa=4.0):
    mu, sigma = gp.predict(X, return_std=True)
    return (mu + kappa * sigma).ravel()

# --- 5. Generate candidate points on a 2D grid ---
grid_size = 100
margin = 0.02
x1 = np.linspace(margin, 1 - margin, grid_size)
x2 = np.linspace(margin, 1 - margin, grid_size)
X_candidates = np.array([[i, j] for i in x1 for j in x2])

# --- 6. Evaluate UCB acquisition ---
acq_values = acquisition_ucb(X_candidates, gp, kappa=6.0)

# --- 7. Select next point ---
next_point = X_candidates[np.argmax(acq_values)]
best_ucb = np.max(acq_values)

print(f"Next point to query: [{next_point[0]:.6f}, {next_point[1]:.6f}], UCB = {best_ucb:.6f}")


In [None]:
mu, sigma = gp.predict(X_candidates, return_std=True)
print("Max sigma:", np.max(sigma))


In [None]:
# compute mu, sigma and UCB
mu, sigma = gp.predict(X_candidates, return_std=True)
ucb = (mu + 6.0 * sigma).ravel()

# top 10
topk = 10
ix = np.argsort(ucb)[-topk:][::-1]
for rank, i in enumerate(ix, 1):
    pt = X_candidates[i]
    dists = np.linalg.norm(X_all - pt, axis=1)
    print(f"{rank:02d}: point={pt}, UCB={ucb[i]:.6f}, min_dist_to_existing={dists.min():.4f}, mu={mu[i]:.6f}, sigma={sigma[i]:.6f}")


Let the GP learn an appropriate kernel amplitude and length scale
This usually fixes the constant-mu / constant-sigma behavior permanently.

In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel as C, Matern

# === 1️⃣ Combine your data ===
X_all = np.array([
    [0.31940389, 0.76295937],
    [0.57432921, 0.8798981 ],
    [0.73102363, 0.73299988],
    [0.84035342, 0.26473161],
    [0.65011406, 0.68152635],
    [0.41043714, 0.1475543 ],
    [0.31269116, 0.07872278],
    [0.68341817, 0.86105746],
    [0.08250725, 0.40348751],
    [0.88388983, 0.58225397],
    [0.080808,   0.404040],
    [0.393939,   0.070707],
    [0.020000,   0.020000],
])

y_all = np.array([
    1.32267704e-079, 1.03307824e-046, 7.71087511e-016,
    3.34177101e-124, -3.60606264e-003, -2.15924904e-054,
    -2.08909327e-091, 2.53500115e-040, 3.60677119e-081,
    6.22985647e-048, 5.34214011784672e-82,
    4.676119097408122e-88, 9.127963232956071e-225
])

# --- Signed log transform to stabilize magnitude spread ---
eps = 1e-20
signs = np.sign(y_all)
signs[signs == 0] = 1.0
y_trans = signs * np.log10(np.abs(y_all) + eps)

# === 2️⃣ Define GP with learnable kernel ===
kernel = C(1.0, (1e-3, 1e3)) * Matern(length_scale=0.4, length_scale_bounds=(1e-2, 2.0), nu=2.5)

gp = GaussianProcessRegressor(
    kernel=kernel,
    alpha=1e-6,
    normalize_y=True,
    n_restarts_optimizer=10,
    random_state=0
)

gp.fit(X_all, y_trans)

# === 3️⃣ Define UCB acquisition function ===
def acquisition_ucb(X, gp, kappa=6.0):
    mu, sigma = gp.predict(X, return_std=True)
    return mu + kappa * sigma, mu, sigma

# === 4️⃣ Candidate grid (avoid exact edges slightly) ===
grid_size = 100
x1 = np.linspace(0.02, 0.98, grid_size)
x2 = np.linspace(0.02, 0.98, grid_size)
X_candidates = np.array([[i, j] for i in x1 for j in x2])

# === 5️⃣ Evaluate acquisition ===
acq_values, mu, sigma = acquisition_ucb(X_candidates, gp, kappa=6.0)
best_idx = np.argmax(acq_values)
next_point = X_candidates[best_idx]
best_ucb = acq_values[best_idx]

# === 6️⃣ Diagnostics: top 10 candidates ===
min_dists = np.min(np.linalg.norm(X_candidates[:, None, :] - X_all[None, :, :], axis=2), axis=1)
top_idx = np.argsort(acq_values)[-10:][::-1]

print("=== Top 10 UCB candidates ===")
for rank, i in enumerate(top_idx, 1):
    print(f"{rank:02d}: point={X_candidates[i]}, UCB={acq_values[i]:.6f}, "
          f"min_dist={min_dists[i]:.4f}, mu={mu[i]:.6f}, sigma={sigma[i]:.6f}")

print("\nRecommended next query (UCB):", np.round(next_point, 6), "UCB =", round(best_ucb, 6))

# === 7️⃣ Backup: farthest-from-existing (exploratory fallback) ===
idx_far = np.argmax(min_dists)
far_point = X_candidates[idx_far]
print("Exploration fallback (farthest-from-existing):", np.round(far_point, 6),
      "min_dist =", round(min_dists[idx_far], 4))
