In [None]:
%matplotlib inline
import os                                               # to set current working directory 
import sys                                              # supress output to screen for interactive variogram modeling
import io
import numpy as np                                      # arrays and matrix math
import pandas as pd                                     # DataFrames
import matplotlib.pyplot as plt                         # plotting
from sklearn.model_selection import train_test_split    # train and test split
from sklearn.metrics import mean_squared_error          # model error calculation
from sklearn.preprocessing import StandardScaler        # standardize data
import scipy                                            # kernel density estimator for PDF plot
from matplotlib.pyplot import cm                        # color maps


import tensorflow as tf                                 # build deep learning models
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta


from ipywidgets import interactive                      # widgets and interactivity
from ipywidgets import widgets                            
from ipywidgets import Layout
from ipywidgets import Label
from ipywidgets import VBox, HBox
import warnings
warnings.filterwarnings('ignore')                       # supress warnings

In [19]:
nreal  = 10
seed = 42
np.random.seed(seed)                                   # set seed for reproducibility

activation_functions = ['linear', 'ReLU', 'Sigmoid', 'Softmax', 'Leaky ReLU', 'GELU']
reduce_nodes = [True, False]
node_sizes = [1, 2, 4, 8, 16, 32, 64, 128]
epochs = [1, 2, 3, 4, 5, 10, 20, 50, 100]
learning_rates = [0.0001, 0.001, 0.01, 0.1]
batch_sizes = [1, 2, 4, 8, 16, 32]
optimizers = ['SGD', 'Adam', 'Adadelta', 'RMSprop']

In [24]:
import ipywidgets as widgets
from IPython.display import display

# data parameters
title_left = widgets.HTML(value="<b>📊 Data Parameters</b>")
n = widgets.IntSlider(min=15, max=80, value=30, step=1, description='Data Size (n)', style={'description_width': 'initial'})
split = widgets.FloatSlider(min=0.05, max=0.95, value=0.20, step=0.05, description='Test Split %', style={'description_width': 'initial'})
std = widgets.FloatSlider(min=0, max=50, value=0, step=1.0, description='Noise StDev', style={'description_width': 'initial'})



# Input layer parameters
title_input = widgets.HTML(value="<b>📥 Input Layer</b>")
input_activation = widgets.Dropdown(options=['linear', 'ReLU', 'Sigmoid', 'Softmax', 'Leaky ReLU', 'GELU'],
                                       value='ReLU', description='Activation')
input_nodes = widgets.Dropdown(options=[1, 2, 4, 8, 16, 32, 64, 128], value=64, description='Nodes Per Layer')

title_hidden = widgets.HTML(value="<b>🤖 Hidden Layers</b>")
hidden_activation = widgets.Dropdown(options=['linear', 'ReLU', 'Sigmoid', 'Softmax', 'Leaky ReLU', 'GELU'],
                                       value='ReLU', description='Activation')
reduce_nodes = widgets.ToggleButtons(options=[True, False], value=True, description='Reduce Nodes')
hidden_nodes = widgets.Dropdown(options=[1, 2, 4, 8, 16, 32, 64, 128], value=64, description='Nodes Per Layer')
num_hidden_layers = widgets.IntSlider(min=1, max=4, value=2, step=1, description='Num Hidden Layers')
dropout_rate = widgets.FloatSlider(min=0.0, max=0.3, value=0.1, step=0.1, description='Dropout Rate')

# ANN hyperparameters
title_ann = widgets.HTML(value="<b>🧠 ANN Parameters</b>")
epochs = widgets.Dropdown(options=[1, 2, 3, 4, 5, 10, 20, 50, 100], value=10, description='Epochs')
learning_rate = widgets.Dropdown(options=[0.0001, 0.001, 0.01, 0.1], value=0.001, description='Learning Rate')
batch_size = widgets.Dropdown(options=[1, 2, 4, 8, 16, 32], value=16, description='Batch Size')
optimizer = widgets.Dropdown(options=['SGD', 'Adam', 'Adadelta', 'RMSprop'], value='Adam', description='Optimizer')

data_column = widgets.VBox([title_left, n, split, std])
input_column = widgets.VBox([title_input, input_activation, input_nodes])
hidden_column = widgets.VBox([title_hidden, hidden_activation, hidden_nodes, num_hidden_layers, dropout_rate, reduce_nodes])
ann_column = widgets.VBox([title_ann, epochs, learning_rate, batch_size, optimizer])


# Combine into two-column layout
ui = widgets.HBox([data_column, input_column, hidden_column, ann_column])
# Display the UI
display(ui)


HBox(children=(VBox(children=(HTML(value='<b>📊 Data Parameters</b>'), IntSlider(value=30, description='Data Si…

In [None]:
def data_generator(n: int, std: float, seed: int):
    """
    Generates a synthetic dataset based on a polynomial function with added noise.
    
    Parameters:
    - n (int): Number of data points.
    - split (float): Proportion of the dataset to include in the test split (e.g., 0.2 for 20% test data).
    - std (float): Standard deviation of the Gaussian noise added to the data.
    - seed (int): Random seed for reproducibility.
    
    Returns:
    - X (array): Predictor feature values.
    - y (array): Target values (response variable).
    """
    
    np.random.seed(seed)  # Seed the random number generator for reproducibility

    # Generate dataset
    X_seq = np.linspace(0, 20, 100)  # Sequence for plotting
    X = np.random.rand(n) * 20  # Generate random X values within the range [0, 20]
    
    # Create polynomial target values (quadratic function in this case)
    y = X ** 2 + 50.0  
    
    # Add Gaussian noise to the target variable
    y += np.random.normal(loc=0.0, scale=std, size=n)

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X, y, X_scaled



def split_data(X, y, test_size=0.2, seed=42, realization=0):
    """
    Splits the dataset into training and testing sets.

    Parameters:
    - X (array-like): Feature matrix.
    - y (array-like): Target variable.
    - test_size (float): Proportion of the dataset to include in the test split.
    - seed (int): Random seed for reproducibility.
    - realization (int): Value added to the seed for multiple realizations.

    Returns:
    - X_train, X_test, y_train, y_test: Split training and testing datasets.
    """

    return train_test_split(X, y, test_size=test_size, random_state=seed + realization)

In [None]:
def input_layer(nodes: int, activation_function: str):
    """
    Creates an input layer for a neural network.

    Parameters:
    nodes (int): Number of neurons in the input layer.
    activation_function (str): Activation function to be used in the input layer.

    Returns:
    Dense: A Keras Dense layer with the specified number of nodes and activation function.
    """
    return Dense(nodes, activation=activation_function, input_shape=(1,))


def hidden_layer(nodes: int, activation_function: str):
    """
    Creates a hidden layer for a neural network.

    Parameters:
    nodes (int): The number of neurons in the hidden layer.
    activation_function (str): The activation function to be applied to the layer.

    Returns:
    Dense: A Keras Dense layer with the specified number of neurons and activation function.

    Example:
    >>> layer = hidden_layer(10, 'relu')
    >>> print(layer)
    <keras.src.layers.core.dense.Dense object at 0x...>
    """
    return Dense(nodes, activation=activation_function)


def output_layer():
    """
    Creates an output layer for a neural network.

    Returns:
    Dense: A Keras Dense layer with one neuron and a linear activation function.
    """
    return Dense(1, activation='linear')


def create_hidden_layers(model, num_layers: int, start_nodes: int, reduce_nodes: bool, activation_function: str, dropout_rate: float):
    """
    Adds hidden layers to a model with the option to either reduce or keep constant the number of neurons.

    Parameters:
    - model (Sequential): The Keras Sequential model to which the layers will be added.
    - num_layers (int): Number of hidden layers to add.
    - start_nodes (int): The number of neurons in the first hidden layer.
    - reduce_nodes (bool): If True, the number of neurons will decrease in each layer.
    - activation_function (str): The activation function to use for the hidden layers.

    Returns:
    - model (Sequential): The updated Keras Sequential model with added hidden layers.
    """
    
    for i in range(num_layers):
        if reduce_nodes:
            # Decrease the number of neurons by half in each layer
            nodes = max(1, int(start_nodes / (2 ** i)))  # Avoid going below 1 neuron
        else:
            # Keep the number of neurons constant
            nodes = start_nodes

        model.add(hidden_layer(nodes, activation_function))
        model.add(Dropout(dropout_rate))  # Add dropout layer
        # Add a dropout layer after each hidden layer

    return model


def build_model(input_nodes: int, input_activation: str,
                num_hidden_layers: int, hidden_nodes: int, reduce_nodes: bool = True, hidden_activation: str = 'ReLU', dropout_rate: float = 0.0,
                optimizer: str = 'adam', learning_rate: float = 0.01):
    
    """
    Builds a customizable neural network model using the Sequential API.

    This function creates a neural network model with a specified number of hidden layers, 
    customizable hidden node sizes (with an option for reducing nodes), activation functions, 
    and dropout regularization.

    Parameters:
    ----------
    input_nodes : int
        The number of nodes in the input layer.
    
    input_activation : str
        The activation function for the input layer (e.g., 'ReLU', 'sigmoid', 'tanh').
    
    num_hidden_layers : int
        The number of hidden layers to be added to the model.
    
    hidden_nodes : int
        The number of nodes in the first hidden layer. The number of nodes in subsequent 
        layers will decrease if `reduce_nodes` is set to True.
    
    reduce_nodes : bool, default=True
        Whether to reduce the number of nodes in each subsequent hidden layer. If False, 
        all hidden layers will have the same number of nodes as the first hidden layer.
    
    hidden_activation : str, default='ReLU'
        The activation function for all hidden layers (e.g., 'ReLU', 'sigmoid', 'tanh').
    
    dropout_rate : float, default=0.0
        The dropout rate applied to the hidden layers to prevent overfitting. A value between 0 and 1.

    optimizer : str, default='adam'
        The optimizer to use for training the model. Options include 'adam', 'sgd', 'adadelta', and 'rmsprop'.

    learning_rate : float, default=0.01
        The learning rate for the optimizer. This controls how much to change the model in response to the estimated error each time the model weights are updated.

    Returns:
    -------
    model : keras.Sequential
        A compiled Sequential model ready to be trained.
    """

    model = Sequential()
    # Add the input layer
    model.add(input_layer(input_nodes, input_activation))
    # Add hidden layers
    model = create_hidden_layers(model, num_hidden_layers, hidden_nodes, reduce_nodes, hidden_activation, dropout_rate)
    # Add the output layer
    model.add(output_layer())

    # Choose optimizer based on input
    if optimizer == 'Adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'SGD':
        opt = SGD(learning_rate=learning_rate)
    elif optimizer == 'Adadelta':
        opt = Adadelta(learning_rate=learning_rate)
    elif optimizer == 'RMSprop':
        opt = RMSprop(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss='mean_squared_error')

    return model

def train_model(model, X_train, y_train, epochs: int = 10, batch_size: int = 32):
    """
    Trains the neural network model on the provided training data.

    Parameters:
    ----------
    model : keras.Sequential
        The compiled Keras model to be trained.
    
    X_train : np.ndarray
        The input features for training.
    
    y_train : np.ndarray
        The target variable for training.
    
    epochs : int, default=10
        The number of epochs to train the model.
    
    batch_size : int, default=32
        The size of the batches used in training.

    Returns:
    -------
    history : keras.callbacks.History
        A History object containing details about the training process.
    """
    
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)
    return history



In [7]:
# Dictionary to store MSE results
mse_results = {
    'node_sizes': np.zeros([len(node_sizes), nreal]),
    'num_layers': np.zeros([len(num_layers), nreal]),
    'dropout_rates': np.zeros([len(dropout_rates), nreal]),
    'epochs_list': np.zeros([len(epochs), nreal]),
    'learning_rates': np.zeros([len(learning_rates), nreal]),
    'batch_sizes': np.zeros([len(batch_sizes), nreal])
}

In [None]:
model = build_model(input_nodes, input_activation,
                num_hidden_layers, hidden_nodes, reduce_nodes, hidden_activation, dropout_rate,
                optimizer, learning_rate, batch_size)

In [None]:
X, y, X_scaled = data_generator(n, std, seed)

for j in range(nreal):
    # Split data for each realization
    X_train, X_test, y_train, y_test = split_data(X, y, split, seed, j)
    
    # Test different Input node sizes
    for i, nodes in enumerate(node_sizes):
        model = build_model(nodes, input_activation,
                num_hidden_layers, hidden_nodes, reduce_nodes, hidden_activation, dropout_rate,
                optimizer, learning_rate)
        model = train_model(model, X_train, y_train, epochs, batch_size)
        y_pred = model.predict(X_test)
        mse_results['node_sizes'][i, j] = mean_squared_error(y_test, y_pred)

    # Test different Hidden node sizes
    for i, nodes in enumerate(node_sizes):
        model = build_model(input_nodes, input_activation,
                num_hidden_layers, nodes, reduce_nodes, hidden_activation, dropout_rate,
                optimizer, learning_rate)
        model = train_model(model, X_train, y_train, epochs, batch_size)
        y_pred = model.predict(X_test)
        mse_results['node_sizes'][i, j] = mean_squared_error(y_test, y_pred)

    # Test different number of layers
    for i, layers in enumerate(num_layers):
        model = build_model(input_nodes, input_activation,
                layers, hidden_nodes, reduce_nodes, hidden_activation, dropout_rate,
                optimizer, learning_rate)
        model = train_model(model, X_train, y_train, epochs, batch_size)
        y_pred = model.predict(X_test)
        mse_results['num_layers'][i, j] = mean_squared_error(y_test, y_pred)

    # Test different dropout rates
    for i, dropout in enumerate(dropout_rates):
        model = build_model(input_nodes, input_activation,
                num_hidden_layers, hidden_nodes, reduce_nodes, hidden_activation, dropout,
                optimizer, learning_rate)
        model = train_model(model, X_train, y_train, epochs, batch_size)
        y_pred = model.predict(X_test)
        mse_results['dropout_rates'][i, j] = mean_squared_error(y_test, y_pred)

    # Test different number of epochs
    for i, epoch_value in enumerate(epochs_list):
        model = build_model(input_nodes, input_activation,
                num_hidden_layers, hidden_nodes, reduce_nodes, hidden_activation, dropout,
                optimizer, learning_rate)
        model = train_model(model, X_train, y_train, epoch_value, batch_size)
        y_pred = model.predict(X_test)
        mse_results['epochs_list'][i, j] = mean_squared_error(y_test, y_pred)

    # Test different learning rates
    for i, lr in enumerate(learning_rates):
        model = build_model(input_nodes, input_activation,
                num_hidden_layers, hidden_nodes, reduce_nodes, hidden_activation, dropout_rate,
                optimizer, learning_rate)
        model = train_model(model, X_train, y_train, epochs, batch_size)
        y_pred = model.predict(X_test)
        mse_results['learning_rates'][i, j] = mean_squared_error(y_test, y_pred)

    # Test different batch sizes
    for i, batch in enumerate(batch_sizes):
        model = build_model(input_nodes, input_activation,
                num_hidden_layers, hidden_nodes, reduce_nodes, hidden_activation, dropout_rate,
                optimizer, learning_rate)
        model = train_model(model, X_train, y_train, epochs, batch)
        y_pred = model.predict(X_test)
        mse_results['batch_sizes'][i, j] = mean_squared_error(y_test, y_pred)

# The mse_results dictionary now contains the MSE for each parameter across nreal realizations

In [None]:
# Compile the model with the Adam optimizer, and custom learning rate
optimizer = Adam(learning_rate=0.01)  # You can change the learning rate as needed

model.compile(optimizer=optimizer, loss='mean_squared_error')

# Train the model with custom batch size
model.fit(X, y, epochs=100, batch_size=2, verbose=1)  # Using batch size of 2

# Predict with the trained model
predictions = model.predict(X)

# Print the predictions
print("Predictions: ", predictions.flatten())

In [None]:
l = widgets.Text(value='                                       Machine Learning Overfit/Generalization Demo, Prof. Michael Pyrcz and John Eric McCarthy II, The University of Texas at Austin',
                 layout=Layout(width='950px', height='30px'))

n = widgets.IntSlider(min=15, max = 80, value=30, step = 1, description = 'n',orientation='horizontal', style = {'description_width': 'initial'}, continuous_update=False)
split = widgets.FloatSlider(min=0.05, max = .95, value=0.20, step = 0.05, description = 'Test %',orientation='horizontal',style = {'description_width': 'initial'}, continuous_update=False)
std = widgets.FloatSlider(min=0, max = 50, value=0, step = 1.0, description = 'Noise StDev',orientation='horizontal',style = {'description_width': 'initial'}, continuous_update=False)
degree = widgets.IntSlider(min=1, max = 12, value=1, step = 1, description = 'Model Order',orientation='horizontal', style = {'description_width': 'initial'}, continuous_update=False)

ui = widgets.HBox([n,split,std,degree],)
ui2 = widgets.VBox([l,ui],)