In [5]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from mpl_toolkits.mplot3d import Axes3D
from sympy import symbols, sin, cos, exp, diff
from sympy import lambdify

###  P1 
**Redo Assignment 7 for bivariate functions.**

(a) 
$$f(x)=x^2-10\sin(2x)+y^2-10\sin(2y)$$

(b)  
$$f(x)=e^{-0.2(x^2+y^2)}\cos(4x)\cos(4y)$$

(c)  
$$f(x)=x^3-0.5x+\sin(3x)+y^3-0.5y+\sin(3y)$$

(d) 
$$f(x)=\sin(x)+\cos(2x)+\sin(y)+\cos(2y)$$

(e) 
$$f(x)=(x-2)^2\cos(3x)+(y-2)^2\cos(3y)$$

In [6]:
x, y = symbols('x y')

f1 = x**2 - 10 * sin(2*x) + y**2 - 10 * sin(2*y)
f2 = exp(-0.2 * (x**2 + y**2)) * cos(4*x) * cos(4*y)
f3 = x**3 - 0.5 * x + sin(3*x) + y**3 - 0.5 * y + sin(3*y)
f4 = sin(x) + cos(2*x) + sin(y) + cos(2*y)
f5 = (x - 2)**2 * cos(3*x) + (y - 2)**2 * cos(3*y)

df1_dx = f1.diff(x)
df1_dy = f1.diff(y)

df2_dx = f2.diff(x)
df2_dy = f2.diff(y)

df3_dx = f3.diff(x)
df3_dy = f3.diff(y)

df4_dx = f4.diff(x)
df4_dy = f4.diff(y)

df5_dx = f5.diff(x)
df5_dy = f5.diff(y)

num_f1 = lambdify((x, y), f1, 'numpy')
num_f2 = lambdify((x, y), f2, 'numpy')
num_f3 = lambdify((x, y), f3, 'numpy')
num_f4 = lambdify((x, y), f4, 'numpy')
num_f5 = lambdify((x, y), f5, 'numpy')

num_df1_dx = lambdify((x, y), df1_dx)
num_df1_dy = lambdify((x, y), df1_dy)

num_df2_dx = lambdify((x, y), df2_dx)
num_df2_dy = lambdify((x, y), df2_dy)

num_df3_dx = lambdify((x, y), df3_dx)
num_df3_dy = lambdify((x, y), df3_dy)

num_df4_dx = lambdify((x, y), df4_dx)
num_df4_dy = lambdify((x, y), df4_dy)

num_df5_dx = lambdify((x, y), df5_dx)
num_df5_dy = lambdify((x, y), df5_dy)

In [7]:

def gradient_descent(df_dx, df_dy, x0, y0, learning_rate, max_iters=100):
    x, y = x0, y0
    trajectory = [(x, y)]
    for i in range(max_iters):
        grad_x = df_dx(x, y)
        grad_y = df_dy(x, y)
        x -= learning_rate * grad_x
        y -= learning_rate * grad_y
        trajectory.append((x, y))
        if abs(grad_x) < 1e-6 and abs(grad_y) < 1e-6:
            break
    return (x, y), trajectory


In [8]:
def plot_gradient_descent(gradient_descent_func, initial_points, step_sizes):
    functions = [num_f1, num_f2, num_f3, num_f4, num_f5]
    derivatives = [
        (num_df1_dx, num_df1_dy),
        (num_df2_dx, num_df2_dy),
        (num_df3_dx, num_df3_dy),
        (num_df4_dx, num_df4_dy),
        (num_df5_dx, num_df5_dy)
    ]
    labels = ['f1', 'f2', 'f3', 'f4', 'f5']
    x_ranges = [(-3, 3), (-3, 3), (-3, 3), (-3, 3), (-3, 3)]
    y_ranges = [(-3, 3), (-3, 3), (-3, 3), (-3, 3), (-3, 3)]

    for _, (f, (df_dx, df_dy), (x0, y0), lr, label, x_range, y_range) in enumerate(zip(functions, derivatives, initial_points, step_sizes, labels, x_ranges, y_ranges)):
        final_point, trajectory = gradient_descent_func(df_dx, df_dy, x0, y0, lr)
        print(f"Final point for {label}:", final_point)

        x_vals = np.linspace(x_range[0], x_range[1], 100)
        y_vals = np.linspace(y_range[0], y_range[1], 100)
        X, Y = np.meshgrid(x_vals, y_vals)
        Z = f(X, Y)

        trajectory = np.array(trajectory)
        traj_x, traj_y = trajectory[:, 0], trajectory[:, 1]
        traj_z = f(traj_x, traj_y)

        fig = go.Figure(data=[
            go.Surface(z=Z, x=X, y=Y, opacity=0.8),
            go.Scatter3d(x=traj_x, y=traj_y, z=traj_z, mode='lines+markers', marker=dict(size=5, color='red'))
        ])
        fig.update_layout(
            title=f"{gradient_descent_func.__name__} on {label}",
            scene=dict(
                xaxis_title='x',
                yaxis_title='y',
                zaxis_title='f(x, y)'
            )
        )
        fig.show()

# Example usage
initial_points = [(0.0, 0.0), (1.0, 1.0), (-1.0, -1.0), (0.5, -0.5), (-0.5, 0.5)]
step_sizes = [0.01, 0.01, 0.01, 0.01, 0.01]

plot_gradient_descent(gradient_descent, initial_points, step_sizes)

Final point for f1: (0.7479649445217313, 0.7479649445217313)


Final point for f2: (1.178086268447804, 1.178086268447804)


Final point for f3: (-0.5865742554452837, -0.5865742554452837)


Final point for f4: (1.4532587873092186, -1.5608644054367617)


Final point for f5: (-1.1174236949958996, 0.8695808460448554)


### momentum

In [9]:
def gradient_descent_momentum(df_dx, df_dy, x0, y0, learning_rate=0.01, momentum=0.95, max_iters=100):
    x, y = x0, y0
    velocity_x, velocity_y = 0, 0  # Initialize velocities for x and y
    trajectory = [(x, y)]

    for i in range(max_iters):
        grad_x = df_dx(x, y)
        grad_y = df_dy(x, y)

        # Update velocities with the momentum factor
        velocity_x = momentum * velocity_x + learning_rate * grad_x
        velocity_y = momentum * velocity_y + learning_rate * grad_y

        # Update positions using the velocities instead of raw gradients
        x -= velocity_x
        y -= velocity_y

        trajectory.append((x, y))

        # Check for convergence based on gradient magnitude
        if abs(grad_x) < 1e-6 and abs(grad_y) < 1e-6:
            break

    return (x, y), trajectory


In [10]:
plot_gradient_descent(gradient_descent_momentum, initial_points, step_sizes)

Final point for f1: (0.7890790661478613, 0.7890790661478613)


Final point for f2: (1.180265564877794, 1.180265564877794)


Final point for f3: (-0.5549775407096236, -0.5549775407096236)


Final point for f4: (1.6444128620945082, -1.6346239196406465)


Final point for f5: (-1.1532243398752342, 0.8462527013130001)


### P2
**Generalize your gradient descent to accept a function with arbitrarily many variables. Demonstrate it on a function with 3, 5, 50 variables (the function can be simple).**

In [11]:
def gradient_descent_multi(gradients, initial_point, learning_rate, max_iters=100):
    point = initial_point
    trajectory = [point.copy()]
    for _ in range(max_iters):
        grads = [g(*point) for g in gradients]
        new_point = [p - learning_rate * g for p, g in zip(point, grads)]
        trajectory.append(new_point.copy())
        if all(abs(g) < 1e-6 for g in grads):
            break
        point = new_point
    return point, trajectory


$$f(x,y,z) = x^2+y^2+z^2$$ 

In [12]:
gradients_3 = [
    lambda x, y, z: 2 * x,
    lambda x, y, z: 2 * y,
    lambda x, y, z: 2 * z
]
initial_point_3 = [1.0, 1.0, 1.0]
final_point_3, trajectory_3 = gradient_descent_multi(gradients_3, initial_point_3, learning_rate=0.1)
print("Final point (3 variables):", final_point_3)


Final point (3 variables): [4.0173451106474777e-07, 4.0173451106474777e-07, 4.0173451106474777e-07]


$$f(x_1,x_2,x_3, x_4, x_5) = x_1^2+x_2^2+x_3^2+x_4^2+x_5^2+$$ 

In [16]:
gradients_5 = []
for i in range(5):
    gradients_5.append(lambda *args, i=i: 2 * args[i])
initial_point_5 = [1] * 5
final_point_5, trajectory_50 = gradient_descent_multi(gradients_5, initial_point_5, learning_rate=0.01)
print("Final point (5 variables):", final_point_5)


Final point (5 variables): [0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316]


$$f(x_1,x_2, ....,x_5) = x_1^2+x_2^2+...+x_{50}^2$$ 

In [20]:
gradients_50 = []
for i in range(50):
    gradients_50.append(lambda *args, i=i: 2 * args[i])
initial_point_50 = [1] * 50
final_point_50, trajectory_50 = gradient_descent_multi(gradients_50, initial_point_50, learning_rate = 0.01)
print("Final point (50 variables):", final_point_50)


Final point (50 variables): [0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.13261955589475316, 0.132