# Imports

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.optimize import minimize

# Load the Data

In [2]:
# Load the Diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Prepare the Data

In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
# Convert target to float32
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

# Convert data to TensorFlow tensors
X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float32)

# Standard Optimizer

In [5]:
# Helper function to calculate MSE
def calculate_mse(model, X, y_true):
    # Ensure input shapes are consistent
    if len(X.shape) == 1:
        X = np.expand_dims(X, axis=0)

    y_pred = model.predict(X)
    mse = tf.keras.losses.MeanSquaredError(
        reduction="sum_over_batch_size", name="mean_squared_error"
    )
    return mse(y_true, y_pred).numpy()

In [6]:
# Define a function to build a Keras model
def build_model():
    model = tf.keras.Sequential([
        layers.Input(shape=(X_train.shape[1],)),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    return model

In [7]:
# Build the model
model = build_model()

# Compile the model with Adam optimizer
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)

# Evaluate the model
mse_adam = calculate_mse(model, X_test, y_test)
print(f"Adam Optimizer Test MSE: {mse_adam:.4f}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Adam Optimizer Test MSE: 2933.6528


# SciPy Implementation 1

In [8]:
# Function to flatten the model weights into a single vector
def flatten_weights(weights):
    return np.concatenate([tf.reshape(w, [-1]).numpy() for w in weights])

# Function to reshape the flat vector back into the original weights
def reshape_weights(flat_weights, weight_shapes):
    weights = []
    offset = 0
    for shape in weight_shapes:
        size = np.prod(shape)
        weights.append(tf.reshape(flat_weights[offset:offset + size], shape))
        offset += size
    return weights

# Function to compute the loss and gradients
def scipy_loss_and_grads(flat_vars, model, X, y):
    var_shapes = [v.shape for v in model.trainable_variables]
    vars = tf.split(tf.convert_to_tensor(flat_vars, dtype=tf.float32), [tf.reduce_prod(v.shape) for v in model.trainable_variables])
    vars = [tf.reshape(v, shape) for v, shape in zip(vars, var_shapes)]
    model.set_weights(vars)

    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.reduce_mean(tf.square(y - y_pred))
    grads = tape.gradient(loss, model.trainable_variables)
    flat_grads = flatten_weights(grads)
    return loss.numpy().astype(np.float64), flat_grads.astype(np.float64)

# Function to perform SciPy optimization
def fit_scipy_minimize(model, X, y, method='L-BFGS-B', max_iter=100):
    initial_weights = model.get_weights()
    weight_shapes = [w.shape for w in initial_weights]
    initial_weights_flat = flatten_weights(initial_weights)

    results = minimize(
        fun=lambda w: scipy_loss_and_grads(w, model, X, y)[0],
        x0=initial_weights_flat,
        jac=lambda w: scipy_loss_and_grads(w, model, X, y)[1],
        method=method,
        options={'maxiter': max_iter}
    )

    optimized_weights = reshape_weights(results.x, weight_shapes)
    model.set_weights(optimized_weights)

    return model

In [9]:
# Build the model
model_scipy = build_model()

# Train the model
fitted_model_scipy = fit_scipy_minimize(
    model_scipy,
    X_train,
    y_train,
    method='L-BFGS-B',
    max_iter=500
)

# Evaluate the model
mse_scipy = calculate_mse(fitted_model_scipy, X_test, y_test)
print(f"SciPy Test MSE: {mse_scipy:.4f}\n")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
SciPy Test MSE: 5368.9180



# SciPy Implementation 2

In [10]:
# Enable eager execution
tf.config.run_functions_eagerly(True)

# Enable eager execution for tf.data functions
tf.data.experimental.enable_debug_mode()

# Function to get the model's weights and biases as a single vector
def get_weights_vector(model):
    weights = []
    for layer in model.layers:
        for param in layer.get_weights():
            weights.append(param.flatten())
    return np.concatenate(weights)

# Function to set the model's weights and biases from a vector
def set_weights_vector(model, vector):
    index = 0
    new_weights = []
    for layer in model.layers:
        layer_weights = []
        for param in layer.get_weights():
            shape = param.shape
            size = np.prod(shape)
            layer_weights.append(vector[index:index + size].reshape(shape))
            index += size
        new_weights.append(layer_weights)
    for layer, layer_weights in zip(model.layers, new_weights):
        layer.set_weights(layer_weights)

# Function to compute the loss
@tf.function(reduce_retracing=True)
def loss_function(weights_vector, model, X, y_true):
    set_weights_vector(model, weights_vector)
    y_pred = model(X, training=False)
    loss = tf.keras.losses.MeanSquaredError()(y_true, y_pred)
    return loss.numpy()

# Function to perform SciPy optimization
def fit_scipy_minimize(model, X_train, y_train, method='L-BFGS-B', jac='2-point', maxiter=100, **kwargs):
    # Convert inputs to tensors if they are not already
    X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
    y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float32)

    # Get the initial weights vector
    initial_weights_vector = get_weights_vector(model)

    # Perform the optimization using SciPy's minimize function
    result = minimize(fun=loss_function,
                      x0=initial_weights_vector,
                      args=(model, X_train_tensor, y_train_tensor),
                      method=method,
                      jac=jac,
                      options={'maxiter': maxiter},
                      **kwargs)

    # Set the optimized weights back to the model
    set_weights_vector(model, result.x)

    return model

In [11]:
# Build the model
model_scipy2 = build_model()

# Train the model
fitted_model_scipy2 = fit_scipy_minimize(
    model_scipy2,
    X_train,
    y_train,
    method='L-BFGS-B',
    maxiter=500
)

# Evaluate the model
mse_scipy2 = calculate_mse(fitted_model_scipy2, X_test, y_test)
print(f"SciPy Test MSE: {mse_scipy2:.4f}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
SciPy Test MSE: 4446.0269


# SciPy Implementation 3

In [12]:
import tensorflow as tf
import numpy as np
from scipy.optimize import minimize
from tensorflow.keras import layers


class SciPyModelOptimizer:
    def __init__(self, model, enable_eager_execution=True):
        self.model = model
        if enable_eager_execution:
            self.enable_eager_execution()

    def enable_eager_execution(self):
        # Enable eager execution
        tf.config.run_functions_eagerly(True)
        # Enable eager execution for tf.data functions
        tf.data.experimental.enable_debug_mode()

    def get_weights_vector(self):
        weights = []
        for layer in self.model.layers:
            for param in layer.get_weights():
                weights.append(param.flatten())
        return np.concatenate(weights)

    def set_weights_vector(self, vector):
        index = 0
        new_weights = []
        for layer in self.model.layers:
            layer_weights = []
            for param in layer.get_weights():
                shape = param.shape
                size = np.prod(shape)
                layer_weights.append(vector[index:index + size].reshape(shape))
                index += size
            new_weights.append(layer_weights)
        for layer, layer_weights in zip(self.model.layers, new_weights):
            layer.set_weights(layer_weights)

    @tf.function(reduce_retracing=True)
    def loss_function(self, weights_vector, X, y_true):
        self.set_weights_vector(weights_vector)
        y_pred = self.model(X, training=False)
        loss = tf.keras.losses.MeanSquaredError()(y_true, y_pred)
        return loss.numpy()

    def fit(self, X_train, y_train, method='L-BFGS-B', jac='2-point', maxiter=100, **kwargs):
        # Convert inputs to tensors if they are not already
        X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
        y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float32)

        # Get the initial weights vector
        initial_weights_vector = self.get_weights_vector()

        # Perform the optimization using SciPy's minimize function
        result = minimize(fun=self.loss_function,
                          x0=initial_weights_vector,
                          args=(X_train_tensor, y_train_tensor),
                          method=method,
                          jac=jac,
                          options={'maxiter': maxiter},
                          **kwargs)

        # Set the optimized weights back to the model
        self.set_weights_vector(result.x)

        return self

    def evaluate(self, X_test, y_test):
        # Convert test inputs to tensors
        X_test_tensor = tf.convert_to_tensor(X_test, dtype=tf.float32)
        y_test_tensor = tf.convert_to_tensor(y_test, dtype=tf.float32)

        # Predict and calculate the Mean Squared Error (MSE)
        y_pred = self.model(X_test_tensor, training=False)
        mse = tf.keras.losses.MeanSquaredError()(y_test_tensor, y_pred).numpy()
        return mse

In [13]:
# Build the model
model_scipy3 = build_model()

# Instantiate the optimizer
model_with_optimizer = SciPyModelOptimizer(model_scipy3)

# Train the model
fitted_model_scipy3 = model_with_optimizer.fit(
    X_train,
    y_train,
    method='L-BFGS-B',
    maxiter=500
)

# Evaluate the model
mse_scipy3 = fitted_model_scipy3.evaluate(X_test, y_test)
print(f"SciPy Test MSE: {mse_scipy3:.4f}")

SciPy Test MSE: 4378.7764


# Results Comparison

In [14]:
# Summary of results
print("Summary of MSE Results:")
print(f"Adam Optimizer Test MSE: {mse_adam:.4f}")
print(f"SciPy Test MSE (1): {mse_scipy:.4f}")
print(f"SciPy Test MSE (2): {mse_scipy2:.4f}")
print(f"SciPy Test MSE (3): {mse_scipy3:.4f}")

Summary of MSE Results:
Adam Optimizer Test MSE: 2933.6528
SciPy Test MSE (1): 5368.9180
SciPy Test MSE (2): 4446.0269
SciPy Test MSE (3): 4378.7764
