In [1]:
import numpy as np

def gd(f, df, x0, step_size_fn, max_iter):
    """
    Generic gradient descent function.

    Parameters:
    - f: Function to minimize. It takes a column vector and returns a scalar.
    - df: Gradient of function f. It takes a column vector and returns the gradient.
    - x0: Initial value of x, a column vector.
    - step_size_fn: Function that takes the iteration index and returns the step size.
    - max_iter: Number of iterations to perform.

    Returns:
    - Tuple (x, fs, xs) where:
        - x: Value at the final step.
        - fs: List of function values during all iterations.
        - xs: List of x values during all iterations.
    """
    # Initialize lists to store the values of x and f(x)
    xs = [x0]
    fs = [f(x0)]

    # Start with the initial value of x
    x = x0

    for i in range(max_iter):
        # Get the current step size
        step_size = step_size_fn(i)

        # Compute the gradient
        gradient = df(x)

        # Debugging information
        print(f"Iteration {i}: x = {x}, gradient = {gradient}, step_size = {step_size}")

        # Update x using gradient descent
        try:
            # Clamp x to a reasonable range to avoid overflow
            x = x - step_size * gradient
            x = np.clip(x, -1e10, 1e10)  # Clamping to prevent extreme values
        except OverflowError:
            print(f"OverflowError encountered at iteration {i}")
            break

        # Store the new values of x and f(x)
        try:
            fs.append(f(x))
            xs.append(x)
        except ValueError as e:
            print(f"ValueError encountered while evaluating function at iteration {i}: {e}")
            break

    return (x, fs, xs)

# Define a smaller step size function
def constant_step_size(iter_index):
    return 0.01  # Smaller step size

# Define the functions f2 and df2
def f2(v):
    x = float(v[0])
    y = float(v[1])

    # Avoid extremely large values
    if np.abs(x) > 1e5 or np.abs(y) > 1e5:
        x = np.clip(x, -1e5, 1e5)
        y = np.clip(y, -1e5, 1e5)

    term1 = (x - 2.) * (x - 3.) * (x + 3.) * (x + 1.)
    term2 = (x + y - 1)**2

    return term1 + term2

def df2(v):
    x = float(v[0])
    y = float(v[1])

    # Compute the partial derivatives
    df_dx = (-3. + x) * (-2. + x) * (1. + x) + \
            (-3. + x) * (-2. + x) * (3. + x) + \
            (-3. + x) * (1. + x) * (3. + x) + \
            (-2. + x) * (1. + x) * (3. + x) + \
            2 * (-1. + x + y)

    df_dy = 2 * (-1. + x + y)

    return np.array([df_dx, df_dy])

# Example usage
x0_f2 = np.array([0.0, 0.0])  # Initial point for f2

try:
    final_x, fs, xs = gd(f2, df2, x0_f2, constant_step_size, 100)
    print("Final x:", final_x)
    print("Function values:", fs)
    print("X values:", xs)
except Exception as e:
    print("Error during gradient descent:", e)


Iteration 0: x = [0. 0.], gradient = [ 7. -2.], step_size = 0.01
Iteration 1: x = [-0.07  0.02], gradient = [ 8.423928 -2.1     ], step_size = 0.01
Iteration 2: x = [-0.15423928  0.041     ], gradient = [10.08073907 -2.22647856], step_size = 0.01
Iteration 3: x = [-0.25504667  0.06326479], gradient = [11.96595465 -2.38356377], step_size = 0.01
Iteration 4: x = [-0.37470622  0.08710042], gradient = [14.03666881 -2.57521159], step_size = 0.01
Iteration 5: x = [-0.51507291  0.11285254], gradient = [16.18466732 -2.80444073], step_size = 0.01
Iteration 6: x = [-0.67691958  0.14089695], gradient = [18.20481245 -3.07204526], step_size = 0.01
Iteration 7: x = [-0.8589677  0.1716174], gradient = [19.77403916 -3.37470061], step_size = 0.01
Iteration 8: x = [-1.05670809  0.20536441], gradient = [20.47517843 -3.70268738], step_size = 0.01
Iteration 9: x = [-1.26145988  0.24239128], gradient = [19.91078838 -4.0381372 ], step_size = 0.01
Iteration 10: x = [-1.46056776  0.28277265], gradient = [17.91