In [43]:
import numpy as np
from typing import List, Tuple

In [35]:
def cost_function(x: np.ndarray, y: np.ndarray, w: float, b: float) -> float:
    """
    Calculate the Mean Squared Error cost for linear regression.
    
    Args:
        x (np.ndarray): Input features
        y (np.ndarray): True target values
        w (float): Weight parameter
        b (float): Bias parameter
    
    Returns:
        float: The computed MSE cost
    """
    m = len(x)
    predictions = (w * x) + b
    cost = (1/(2*m)) * np.sum((y - predictions) ** 2)
    return cost



In [36]:
def gradient_function(x: np.ndarray, y: np.ndarray, w: float, b: float) -> Tuple[float, float]:
    """
    Calculate the gradients of the cost function with respect to w and b.
    
    Args:
        x (np.ndarray): Input features
        y (np.ndarray): True target values
        w (float): Current weight parameter
        b (float): Current bias parameter
    
    Returns:
        Tuple[float, float]: Gradients (dw, db)
    """
    m = len(x)
    predictions = (w * x) + b
    
    # Calculate gradients
    dw = (1/m) * np.sum((predictions - y) * x)
    db = (1/m) * np.sum(predictions - y)
    
    return dw, db

In [41]:
def gradient_descent(
    x: np.ndarray, 
    y: np.ndarray, 
    learning_rate: float, 
    iterations: int, 
    tolerance: float = 1e-6
) -> Tuple[float, float, List[float]]:
    """
    Perform gradient descent optimization for linear regression.
    
    Args:
        x (np.ndarray): Input features
        y (np.ndarray): True target values
        learning_rate (float): Learning rate (alpha)
        iterations (int): Maximum number of iterations
        tolerance (float, optional): Convergence tolerance. Defaults to 1e-6
    
    Returns:
        Tuple[float, float, List[float]]: Final weight, final bias, and cost history
    """
    # Initialize parameters
    w = 0.0
    b = 0.0
    cost_history = []
    
    for i in range(iterations):
        # Calculate gradients
        dw, db = gradient_function(x, y, w, b)
        
        # Update parameters
        w -= learning_rate * dw
        b -= learning_rate * db
        
        # Calculate and store cost
        cost = cost_function(x, y, w, b)
        cost_history.append(cost)
        
        # Print progress
        if i % 100 == 0:  # Print every 100 iterations
            print(f"Iteration {i}: Cost = {cost:.6f}")
        
        # Check convergence
        if i > 0 and abs(cost_history[i] - cost_history[i-1]) < tolerance:
            print(f"Converged at iteration {i}")
            break
    
    return w, b, cost_history

In [42]:
# Example usage
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 6, 8, 10])
w, b, costs = gradient_descent(x, y, learning_rate=0.01, iterations=1000)

Iteration 0: Cost = 17.107600
Iteration 100: Cost = 0.017117
Iteration 200: Cost = 0.012203
Iteration 300: Cost = 0.008700
Iteration 400: Cost = 0.006202
Iteration 500: Cost = 0.004422
Iteration 600: Cost = 0.003152
Iteration 700: Cost = 0.002247
Iteration 800: Cost = 0.001602
Iteration 900: Cost = 0.001142
