In [1]:
from typing import List, Tuple, Callable, Optional
import numpy as np
import pandas as pd
from sklearn.gaussian_process.kernels import RBF, RationalQuadratic
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import plotly.graph_objects as go

RANDOM_STATE = 42


In [2]:
def plot_heatmap(
    func, func_name, X_bounds,
    points_data: Optional[Tuple[np.ndarray, np.ndarray, str]] = None,
):
    # Create a grid of points
    xx, yy = np.meshgrid(
        np.arange(X_bounds[0, 0], X_bounds[0, 1], 0.01),
        np.arange(X_bounds[1, 0], X_bounds[1, 1], 0.01),
    )
    grid_points = np.c_[xx.ravel(), yy.ravel()]

    # Set Colorbar to use for plots
    colorbar = dict(
        len=0.8,
        thickness=30,
        title_text='Target',
        title_side='bottom',
        y=0., yanchor='bottom',  # Anchor point for vertical positioning
        x=1.0, xanchor='left',  # Anchor point for horizontal positioning
    )

    # Loss function values
    Z = func(grid_points)
    Z = Z.reshape(xx.shape)

    # Create the heatmap
    # Create the heatmap with countours
    func_heatmap = go.Heatmap(
        x=xx[0], 
        y=yy[:, 0],
        z=Z,
        colorscale='viridis',
        colorbar=colorbar,
    )

    data = [func_heatmap]

    # Add points trace if points_data is provided
    if points_data is not None:
        X_points, y_points, points_label = points_data
        X_points = np.atleast_2d(X_points)
        points_trace = go.Scatter(
            x=X_points[:, 0],
            y=X_points[:, 1],
            name=points_label,
            mode='markers',
            marker=dict(
                color='red',
                size=7,
                symbol='circle',
                line_width=1
            ),
            text=[
                f'X1: {x1:.4f}<br>'
                f'X2: {x2:.4f}<br>'
                f'y : {y:.4f}'
                for (x1, x2), y in zip(X_points, y_points)
            ],
            hoverinfo='text'
        )
        data.append(points_trace)

    fig_size = 700

    fig = go.Figure(
        data=data,
        layout=dict(
            xaxis_title="X1",
            yaxis_title="X2",
            showlegend=True,
            width=fig_size,
            height=fig_size,  # Keep the overall figure square aspect_ratio = 1
            xaxis=dict(
                range=X_bounds[0, :],
                constrain='domain',  # Ensures x-axis stays within its domain
            ),
            yaxis=dict(
                range=X_bounds[1, :],
                constrain='domain',  # Ensures y-axis stays within its domain
                scaleanchor="x",
                scaleratio=1
            ),  # Keep the same scale for x and y axes
            title=f"{func_name} Heatmap",
        )
    )

    fig.show()

In [3]:
def get_cv_results(cv_scores):
    if cv_scores is None:
        raise ValueError("Model has not been fitted yet.")

    # Extract relevant information from cv_results_
    results = pd.DataFrame(cv_scores)

    # Select columns of interest
    param_columns = [col for col in results.columns if col.startswith('param_')]
    score_columns = ['mean_test_score', 'std_test_score', 'rank_test_score']
    selected_columns = param_columns + score_columns

    # Create a DataFrame with selected columns
    df = results[selected_columns].sort_values('rank_test_score')

    # Rename columns for clarity
    df = df.rename(columns={
        'mean_test_score': 'Mean CV Score',
        'std_test_score': 'Std CV Score',
        'rank_test_score': 'Rank'
    })

    # Round numeric columns
    numeric_columns = ['Mean CV Score', 'Std CV Score']
    df[numeric_columns] = df[numeric_columns].round(4)

    return df

In [4]:
def evaluate(model, X, y):
    y_pred = model.predict(X)

    mse = mean_squared_error(y, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y, y_pred)
    mae = mean_absolute_error(y, y_pred)
    
    return {
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2,
    }

# Polynomial function

In [5]:
def polynomial_function(X: np.ndarray, n_dim_y: int) -> np.ndarray:
    # Ensure X is 2D
    X = np.atleast_2d(X)
    n_samples, n_dim_x = X.shape
    
    # Ensure all input values are positive for the square root term
    X_positive = np.abs(X) + 1e-6
    
    # Initialize output array
    Y = np.zeros((n_samples, n_dim_y))
    
    # Coefficients
    a = 0.5
    b = 0.3
    c = 0.2
    
    for i in range(n_dim_y):
        if i == 0:
            # Polynomial with square root term
            Y[:, i] = (a * X[:, 0]**2 + 
                       b * X[:, min(1, n_dim_x-1)] + 
                       c * np.sqrt(X_positive[:, min(2, n_dim_x-1)]))
        elif i == 1:
            # Quadratic function
            Y[:, i] = (a * X[:, 0]**2 + 
                       b * X[:, min(1, n_dim_x-1)]**2 + 
                       c * X[:, min(2, n_dim_x-1)]**2)
        else:
            # Increasing polynomial powers
            power = i + 1
            Y[:, i] = np.sum(a * X**power + b * X**(power-1), axis=1)
    
    # Normalize Y to [0, 1] range
    Y = (Y - Y.min(axis=0)) / (Y.max(axis=0) - Y.min(axis=0))
    
    # Add small Gaussian noise
    noise_level = 0.1
    noise = np.random.normal(0, noise_level, Y.shape)
    Y = np.clip(Y + noise, 0, 1)  # Ensure values remain in [0, 1]
    
    return Y

In [6]:
# Set input space
n_dim_X = 2
X_bounds = np.array([(0, 1)]*n_dim_X)

def polynomial_function_1d(X: np.ndarray) -> np.ndarray:
    return polynomial_function(X, 1)

# Plot heatmap
plot_heatmap(polynomial_function_1d, 'Polynomial Function', X_bounds)

# Shubert function

In [7]:
def shubert_like_function(X: np.ndarray, n_dim_y: int) -> np.ndarray:
    X = np.atleast_2d(X)
    n_samples, n_dim_x = X.shape
    
    # Transform [0, 1]^n_dim_X into [-1, 1]^n_dim_X
    X_transformed = 2 * (X - 0.5)

    # Angular frequency (omega)
    omega = 10 * np.pi

    # Compute Shubert-like function for each input dimension
    shubert_terms = 0.5 * ((1 - np.abs(X_transformed)) * np.cos(omega * X_transformed**2) + 1)
    
    # Generate y-dimensional output
    Y = np.zeros((n_samples, n_dim_y))
    
    for i in range(n_dim_y):
        # Use different combinations of input dimensions for each output dimension
        weights = np.random.rand(n_dim_x)
        Y[:, i] = np.dot(shubert_terms, weights) / np.sum(weights)
    
    # Normalize Y to ensure output is in [0, 1]^n_dim_y
    Y = (Y - Y.min(axis=0)) / (Y.max(axis=0) - Y.min(axis=0))
    
    return Y

In [8]:
# Set input space
n_dim_X = 2
X_bounds = np.array([(0, 1)]*n_dim_X)

def shubert_like_function_1d(X: np.ndarray) -> np.ndarray:
    return shubert_like_function(X, 1)

# Plot heatmap
plot_heatmap(shubert_like_function_1d, 'Shubert-like Function', X_bounds)

In [9]:
BANDWIDTH_BOUNDS = (1e-5, 1.0)
KERNELS = {
    'RBF': RBF(
        length_scale=0.5,
        length_scale_bounds=BANDWIDTH_BOUNDS
    ),
    'RQ': RationalQuadratic(
        length_scale=0.5,
        alpha=1.0,
        length_scale_bounds=BANDWIDTH_BOUNDS,
        alpha_bounds=(0.1, 10.0)
    )
}

class GPRModel:
    def __init__(self):
        self.model: GaussianProcessRegressor = None
        self.best_params = None
        self.cv_scores = None
        self.is_trained = False

    def create_model(self, **kwargs):
        return GaussianProcessRegressor(random_state=RANDOM_STATE, **kwargs)

    def fit(self, X, y):

        param_dist = {'kernel': list(KERNELS.values())}

        random_search = RandomizedSearchCV(
            estimator=self.create_model(),
            param_distributions=param_dist,
            n_iter=20,
            cv=5,
            verbose=2,
            n_jobs=-1,
            scoring='r2',
            random_state=RANDOM_STATE
        )

        random_search_result = random_search.fit(X, y)

        self.model = random_search_result.best_estimator_
        self.best_params = random_search_result.best_params_
        self.cv_scores = random_search_result.cv_results_

        print("Best parameters:", self.best_params)
        print("Best score:", random_search_result.best_score_)

        self.is_trained = True

    def predict(self, X):
        return self.model.predict(X)

    def get_model_params(self):
        if not self.is_trained:
            return {}
        # Get kernel parameters
        kernel_params = {
            k: v for k, v in self.model.kernel_.get_params().items()
            if not k.endswith('_bounds')
        }
        return {
            **kernel_params,
            'lml': self.model.log_marginal_likelihood_value_,
            **self.best_params,
            'mean_cv_score': np.mean(self.cv_scores['mean_test_score']),
            'std_cv_score': np.std(self.cv_scores['mean_test_score']),
            'best_score': self.cv_scores['mean_test_score'][self.cv_scores['rank_test_score'].argmin()],
        }

    def get_parameters(self):
        if not self.is_trained:
            return {}
        params = self.get_model_params()
        params.update({
            'n_features': self.model.n_features_in_,
            'n_train': self.model.X_train_.shape[0],
        })
        return params

In [10]:
class MLPModel:
    def __init__(self):
        self.model: MLPRegressor = None
        self.best_params = None
        self.cv_scores = None
        self.is_trained = False

    def create_model(self, **kwargs):
        return MLPRegressor(
            hidden_layer_sizes=(128, 64, 32),
            activation='relu',
            solver='adam',
            alpha=0.005,
            batch_size=16,
            learning_rate='adaptive',
            max_iter=2000,
            early_stopping=True,
            n_iter_no_change=50,
            random_state=RANDOM_STATE,
            **kwargs
        )

    def fit(self, X, y):

        param_dist = {
            'hidden_layer_sizes': [
                (16,), (32,), (64,),  # Single-layer architectures
                (16, 8), (32, 16),  # Simple architectures
                (32, 16, 8), (64, 32, 16),  # Decreasing sizes
                (16, 8, 16), (32, 16, 32),  # Bottleneck architectures
            ],
            'alpha': [0.0001, 0.001, 0.01],  # Focusing on stronger regularization
            'batch_size': [16, 32],  # Smaller batch sizes for small dataset
            'max_iter': [1000, 1500]  # Ensuring enough iterations for convergence
        }
        # -> Models with more complex architeructures works better
        param_dist = {
            'hidden_layer_sizes': [
                (128,), (64,),
                (32, 16), 
                (128, 64, 32), (96, 48, 24), (64, 32, 16), (32, 16, 8), (16, 8, 16)
            ],
            'alpha': [0.0001, 0.001, 0.01],
            'learning_rate': ['adaptive', 'constant'],
            'learning_rate_init': [0.001, 0.01],  # Only used when learning_rate is 'constant'
            'batch_size': [16, 32, 64],
            'max_iter': [1000, 1500, 2000]
        }
        # -> alpha 0.0001 is bad, batch_size 64 is bad, complex architecturs are better
        param_dist = {
            'hidden_layer_sizes': [
                (64, 32, 16), (96, 48, 24), (128, 64, 32), (144, 72, 36),  (256, 128, 64),  # Decreasing
                (64, 32, 64), (128, 64, 128), (160, 80, 160), (256, 128, 256),              # Bottleneck
            ],
            'alpha': [0.001, 0.005, 0.01],
            'learning_rate': ['adaptive', 'constant'],
            'learning_rate_init': [0.001, 0.005],
            'max_iter': [1000, 2000, 3000]
        }
        # -> alpha 0.005 and 0.01 are better, constant learning rate at 0.01 is good

        random_search = RandomizedSearchCV(
            estimator=self.create_model(),
            param_distributions=param_dist,
            n_iter=20,
            cv=5,
            verbose=2,
            n_jobs=-1,
            scoring='r2',
            random_state=RANDOM_STATE
        )

        random_search_result = random_search.fit(X, y)

        self.model = random_search_result.best_estimator_
        self.best_params = random_search_result.best_params_
        self.cv_scores = random_search_result.cv_results_

        print("Best parameters:", self.best_params)
        print("Best score:", random_search_result.best_score_)

        self.is_trained = True

    def predict(self, X):
        return self.model.predict(X)

    def get_model_params(self):
        if not self.is_trained:
            return {}

        return {
            **self.best_params,
            'mean_cv_score': np.mean(self.cv_scores['mean_test_score']),
            'std_cv_score': np.std(self.cv_scores['mean_test_score']),
            'best_score': self.cv_scores['mean_test_score'][self.cv_scores['rank_test_score'].argmin()],
            'total_params': sum(coef.size for coef in self.model.coefs_) + sum(intercept.size for intercept in self.model.intercepts_)
        }

    def get_parameters(self):
        if not self.is_trained:
            return {}
        params = self.get_model_params()
        params.update({
            'n_layers': len(self.model.hidden_layer_sizes) + 2,  # including input and output layers
            'activation': self.model.activation,
            'solver': self.model.solver,
            'learning_rate': self.model.learning_rate,
            'early_stopping': self.model.early_stopping,
            'n_iter_no_change': self.model.n_iter_no_change,
        })
        return params

# Fitting polynomes

### MLP search CV

In [11]:
n_dim_X = 2

# Generate sample data
X = np.random.rand(200, n_dim_X)
y = polynomial_function_1d(X)

# Create and train the model
mlp_model = MLPModel()
mlp_model.fit(X, y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



Best parameters: {'max_iter': 3000, 'learning_rate_init': 0.001, 'learning_rate': 'constant', 'hidden_layer_sizes': (256, 128, 64), 'alpha': 0.005}
Best score: 0.788976478261321


In [12]:
# Print CV results
df_cv_scores = get_cv_results(mlp_model.cv_scores)

# Format the hidden_layer_sizes column
adaptive_mask = (df_cv_scores['param_learning_rate'] == 'adaptive')
df_cv_scores['param_learning_rate_init'][adaptive_mask] = np.nan

# Print the DataFrame
print("\nCross-validation results:")
print(df_cv_scores.to_string(index=False))


Cross-validation results:
param_max_iter param_learning_rate_init param_learning_rate param_hidden_layer_sizes param_alpha  Mean CV Score  Std CV Score  Rank
          3000                    0.001            constant           (256, 128, 64)       0.005         0.7890        0.0344     1
          3000                      NaN            adaptive            (128, 64, 32)       0.005         0.7854        0.0368     2
          3000                      NaN            adaptive           (256, 128, 64)        0.01         0.7843        0.0328     3
          1000                    0.001            constant             (96, 48, 24)       0.005         0.7805        0.0292     4
          2000                      NaN            adaptive           (160, 80, 160)       0.005         0.7783        0.0355     5
          3000                      NaN            adaptive           (160, 80, 160)        0.01         0.7779        0.0371     6
          1000                      NaN          



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [13]:
# Make predictions
X_test = np.random.rand(50, n_dim_X)
y_test = polynomial_function_1d(X_test)

# Evaluate train set
train_metrics = evaluate(mlp_model, X, y)
print(f"\nModel Evaluation on train set of {X.shape[0]} samples:")
for metric, value in train_metrics.items():
    print(f"{metric}: {value:.4f}")

# Evaluate test set
test_metrics = evaluate(mlp_model, X_test, y_test)
print(f"\nModel Evaluation on test set of {X_test.shape[0]} samples:")
for metric, value in test_metrics.items():
    print(f"{metric}: {value:.4f}")

# Get important model parameters
important_params = mlp_model.get_model_params()
print("\nImportant Model Parameters:")
for key, value in important_params.items():
    print(f"{key}: {value}")


Model Evaluation on train set of 200 samples:
MAE: 0.0844
RMSE: 0.1039
R2: 0.8126

Model Evaluation on test set of 50 samples:
MAE: 0.1010
RMSE: 0.1229
R2: 0.7762

Important Model Parameters:
max_iter: 3000
learning_rate_init: 0.001
learning_rate: constant
hidden_layer_sizes: (256, 128, 64)
alpha: 0.005
mean_cv_score: 0.7587370958589807
std_cv_score: 0.026058107714511357
best_score: 0.788976478261321
total_params: 41985


In [14]:
# Set input space
n_dim_X = 2
X_bounds = np.array([(0, 1)]*n_dim_X)

# Plot heatmap
plot_heatmap(mlp_model.predict, 'MLP polynomial prediction', X_bounds)


### GPR

In [15]:
n_dim_X = 2

# Generate sample data
X = np.random.rand(200, n_dim_X)
y = polynomial_function_1d(X)

gpr_model = GPRModel()
gpr_model.fit(X, y)

Fitting 5 folds for each of 2 candidates, totalling 10 fits



The total space of parameters 2 is smaller than n_iter=20. Running 2 iterations. For exhaustive searches, use GridSearchCV.



Best parameters: {'kernel': RationalQuadratic(alpha=1, length_scale=0.5)}
Best score: 0.5268374828197446



The optimal value found for dimension 0 of parameter alpha is close to the specified lower bound 0.1. Decreasing the bound and calling fit again may find a better value.



In [16]:
# Print CV results
df_cv_scores = get_cv_results(gpr_model.cv_scores)

# Print the DataFrame
print("\nCross-validation results:")
print(df_cv_scores.to_string(index=False))


Cross-validation results:
                                param_kernel  Mean CV Score  Std CV Score  Rank
RationalQuadratic(alpha=1, length_scale=0.5)         0.5268        0.1273     1
                       RBF(length_scale=0.5)        -4.1335        0.9840     2


In [17]:
# Make predictions
X_test = np.random.rand(50, n_dim_X)
y_test = polynomial_function_1d(X_test)

# Evaluate train set
train_metrics = evaluate(gpr_model, X, y)
print(f"\nModel Evaluation on train set of {X.shape[0]} samples:")
for metric, value in train_metrics.items():
    print(f"{metric}: {value:.4f}")

# Evaluate test set
test_metrics = evaluate(gpr_model, X_test, y_test)
print(f"\nModel Evaluation on test set of {X_test.shape[0]} samples:")
for metric, value in test_metrics.items():
    print(f"{metric}: {value:.4f}")

# Get important model parameters
important_params = gpr_model.get_model_params()
print("\nImportant Model Parameters:")
for key, value in important_params.items():
    print(f"{key}: {value}")


Model Evaluation on train set of 200 samples:
MAE: 0.0000
RMSE: 0.0000
R2: 1.0000

Model Evaluation on test set of 50 samples:
MAE: 0.1141
RMSE: 0.1476
R2: 0.6483

Important Model Parameters:
length_scale: 0.07516733611202724
alpha: 0.10000000000000002
lml: -27.65583827243941
kernel: RationalQuadratic(alpha=1, length_scale=0.5)
mean_cv_score: -1.8033311091428712
std_cv_score: 2.330168591962616
best_score: 0.5268374828197446


In [18]:
# Set input space
n_dim_X = 2
X_bounds = np.array([(0, 1)]*n_dim_X)

# Plot heatmap
plot_heatmap(gpr_model.predict, 'GPR polynomial prediction', X_bounds, (X, y.ravel(), 'train set'))


# Fitting Shubert

In [19]:
n_dim_X = 2

# Generate sample data
X = np.random.rand(200, n_dim_X)
y = shubert_like_function_1d(X)

# Create and train the model
mlp_model = MLPModel()
mlp_model.fit(X, y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



Best parameters: {'max_iter': 2000, 'learning_rate_init': 0.005, 'learning_rate': 'adaptive', 'hidden_layer_sizes': (160, 80, 160), 'alpha': 0.005}
Best score: 0.6240739383686092


In [20]:
# Print CV results
df_cv_scores = get_cv_results(mlp_model.cv_scores)

# Format the hidden_layer_sizes column
adaptive_mask = (df_cv_scores['param_learning_rate'] == 'adaptive')
df_cv_scores['param_learning_rate_init'][adaptive_mask] = np.nan

# Print the DataFrame
print("\nCross-validation results:")
print(df_cv_scores.to_string(index=False))


Cross-validation results:
param_max_iter param_learning_rate_init param_learning_rate param_hidden_layer_sizes param_alpha  Mean CV Score  Std CV Score  Rank
          2000                      NaN            adaptive           (160, 80, 160)       0.005         0.6241        0.0672     1
          3000                    0.005            constant             (64, 32, 64)       0.005         0.6102        0.0590     2
          3000                      NaN            adaptive             (64, 32, 16)       0.001         0.6068        0.0935     3
          1000                    0.005            constant             (64, 32, 16)       0.001         0.6068        0.0935     3
          3000                      NaN            adaptive           (160, 80, 160)        0.01         0.5854        0.0439     5
          1000                      NaN            adaptive             (64, 32, 16)       0.005         0.5771        0.0698     6
          3000                    0.001          



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [21]:
# Make predictions
X_test = np.random.rand(50, n_dim_X)
y_test = shubert_like_function_1d(X_test)

# Evaluate train set
train_metrics = evaluate(mlp_model, X, y)
print(f"\nModel Evaluation on train set of {X.shape[0]} samples:")
for metric, value in train_metrics.items():
    print(f"{metric}: {value:.4f}")

# Evaluate test set
test_metrics = evaluate(mlp_model, X_test, y_test)
print(f"\nModel Evaluation on test set of {X_test.shape[0]} samples:")
for metric, value in test_metrics.items():
    print(f"{metric}: {value:.4f}")

# Get important model parameters
important_params = mlp_model.get_model_params()
print("\nImportant Model Parameters:")
for key, value in important_params.items():
    print(f"{key}: {value}")


Model Evaluation on train set of 200 samples:
MAE: 0.1005
RMSE: 0.1267
R2: 0.6780

Model Evaluation on test set of 50 samples:
MAE: 0.1896
RMSE: 0.2367
R2: -0.0852

Important Model Parameters:
max_iter: 2000
learning_rate_init: 0.005
learning_rate: adaptive
hidden_layer_sizes: (160, 80, 160)
alpha: 0.005
mean_cv_score: 0.52839436579938
std_cv_score: 0.08635606746728788
best_score: 0.6240739383686092
total_params: 26481


In [22]:
# Set input space
n_dim_X = 2
X_bounds = np.array([(0, 1)]*n_dim_X)

# Plot heatmap
plot_heatmap(mlp_model.predict, 'MLP Shubert prediction', X_bounds)

# Find best sigma to compute score

In [67]:
from scipy.stats import norm
from scipy.optimize import brentq

def find_sigma_for_interval(interval_width: float, sigma_bounds: Tuple[float, float] =(0.005, 0.15)) -> float:
    """
    Find the smallest standard deviation (sigma) that makes the probability of belonging to a given interval
    effectively equal to one for a normal distribution.

    This function uses the Brent's method to efficiently find the optimal sigma value that satisfies
    the target probability condition.

    Parameters:
    -----------
    interval_width : float
        The width of the interval of interest. Should be a positive value between 0 and 1.

    sigma_bounds : Tuple[float, float], optional
        The lower and upper bounds for the sigma search range. Default is (0.005, 0.15).
        - Lower bound (0.005) is suitable for interval widths as small as 0.05.
        - Upper bound (0.15) is suitable for interval widths up to 1.
    """
    half_width = interval_width / 2
    target_prob = 0.999  # to avoid saturation around 1 of objective function
    mu = 0  # Center of the interval

    def objective(sigma):
        proba = norm.cdf(half_width, loc=mu, scale=sigma) - norm.cdf(-half_width, loc=mu, scale=sigma)
        print(f"sigma = {sigma:.3f}, proba = {proba:.3f}")
        return proba - target_prob

    # Define a reasonable range for sigma
    sigma_min, sigma_max = sigma_bounds

    # Use brentq to find the root
    sigma = brentq(objective, sigma_min, sigma_max, xtol=1e-2)
    return sigma

# Example usage
interval_width = 0.2  # For example, if your interest region is [0.6, 0.8]
sigma = find_sigma_for_interval(interval_width)
print(f"Recommended sigma for interval width {interval_width}: {sigma:.6f}")

# Verify the result
mu = 0.7  # Center of the [0.6, 0.8] interval
interest_region = [mu - interval_width/2, mu + interval_width/2]
proba = norm.cdf(interest_region[1], loc=mu, scale=sigma) - norm.cdf(interest_region[0], loc=mu, scale=sigma)
print(f"Probability of being in the interest interval: {proba:.6f}")

sigma = 0.005, proba = 1.000
sigma = 0.150, proba = 0.495
sigma = 0.010, proba = 1.000
sigma = 0.080, proba = 0.789
sigma = 0.015, proba = 1.000
sigma = 0.047, proba = 0.965
sigma = 0.020, proba = 1.000
sigma = 0.034, proba = 0.997
sigma = 0.025, proba = 1.000
Recommended sigma for interval width 0.2: 0.025000
Probability of being in the interest interval: 0.999937


In [72]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.stats import norm

def plot_normal_distribution(mu, interest_region):
    x_range = [0, 1]
    y_range = [0, 10]

    x = np.linspace(*x_range, 1000)

    fig = make_subplots(rows=1, cols=1)

    # Find smallest sigma to make interest proba = 1
    interval_width = interest_region[1] - interest_region[0]
    best_sigma = find_sigma_for_interval(interval_width)
    sigmas = np.logspace(np.log10(0.005), np.log10(0.15), 20)
    sigmas = np.sort(np.unique(np.append(sigmas, best_sigma)))
    interest_probas = []

    for sigma in sigmas:
        pdf = norm.pdf(x, loc=mu, scale=sigma)
        pdf_trace = go.Scatter(x=x, y=pdf, mode='lines', name='PDF', visible=False, line=dict(color='blue'))
        fig.add_trace(pdf_trace)

        integral = norm.cdf(interest_region[1], loc=mu, scale=sigma) - norm.cdf(interest_region[0], loc=mu, scale=sigma)
        interest_probas.append(integral)

    # Make PDF trace visible
    fig.data[0].visible = True

    steps = []
    for i in range(len(sigmas)):
        step = dict(
            method="update",
            args=[
                {"visible": [False] * len(sigmas)},
                {"annotations": [{
                    "text": f"Interest proba: {interest_probas[i]:.4f}",
                    "x": 0.,
                    "y": 1.1,
                    "xref": "paper",
                    "yref": "paper",
                    "showarrow": False,
                    "font": {"size": 14}
                }]}
            ],
            label=f"{sigmas[i]:.3f}"
        )
        step["args"][0]["visible"][i] = True
        steps.append(step)

    sliders = [dict(
        active=0,
        currentvalue={"prefix": "σ: "},
        pad={"t": 50},
        steps=steps
    )]

    fig.update_layout(
        sliders=sliders,
        title=f"Normal Distribution (μ={mu:.1f}, width={interval_width:.2f}, σ<sub>optimal</sub>={best_sigma:.2f})",
        xaxis_title='x',
        yaxis_title='Probability Density',
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        xaxis=dict(range=x_range),
        yaxis=dict(range=y_range)
    )

    fig.add_vline(x=interest_region[0], line_dash="dash", line_color="red")
    fig.add_vline(x=interest_region[1], line_dash="dash", line_color="red")

    fig.show()

# Example usage
mu = 0.5
width = 0.05
offset = 0
plot_normal_distribution(mu=mu, interest_region=[mu+offset-width/2, mu+offset+width/2])

sigma = 0.005, proba = 1.000
sigma = 0.150, proba = 0.132
sigma = 0.010, proba = 0.988
