In [3]:
import numpy as np
import math

# Softmax function
def softmax(z):
    exp_z = np.exp(z - np.max(z))  # To prevent overflow
    return exp_z / np.sum(exp_z)

# Loss function for all samples
def loss(w, b, x, y):
    m = x.shape[0]
    cost = 0
    
    # Loop over each sample
    for i in range(m):
        z = np.dot(x[i], w) + b  # Linear transformation
        y_pred = softmax(z)  # Softmax output
        # Cross-entropy loss for the current sample
        cost -= np.sum(y[i] * np.log(y_pred))  # Only consider the correct class
    
    return cost / m  # Average cost over all samples

# Compute gradient descent
def gradient_descent(w, b, x, y, alpha, max_steps=10000):
    m, n = x.shape  # m = number of samples, n = number of features
    for step in range(max_steps):
        # Compute the loss (for debugging purposes)
        cost = loss(w, b, x, y)
        
        # Gradients initialization
        dw = np.zeros_like(w)
        db = np.zeros_like(b)
        
        # Compute gradients
        for i in range(m):
            z = np.dot(x[i], w) + b
            y_pred = softmax(z)
            
            # Gradient for weights: (y_pred - y_true) * x
            dw += np.outer(x[i], (y_pred - y[i]))
            # Gradient for bias: (y_pred - y_true)
            db += (y_pred - y[i])
        
        # Average gradients over all samples
        dw /= m
        db /= m
        
        # Update weights and bias
        w -= alpha * dw
        b -= alpha * db
        
        # Debugging: Print cost every 1000 steps
        if step % 1000 == 0:
            print(f"Step {step}: cost = {cost}")
    
    return w, b

# Random initialization
np.random.seed(42)  # For reproducibility
W = np.random.randn(2, 3) * 0.01  # 2 features, 3 classes
b = np.zeros(3)  # 3 classes, so 1 bias per class

# Sample input data
x = np.array([[1.0, 2.0], [1.5, 1.8], [5.8, 8.0], [6.0, 9.0], [8.0, 1.0], [9.0, 2.5]])
y = np.array([[1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1]])

# Run gradient descent
w, b = gradient_descent(W, b, x, y, alpha=0.01)

print("Final weights:", w)
print("Final bias:", b)

Step 0: cost = 1.1123483200063804
Step 1000: cost = 0.15031261345548627
Step 2000: cost = 0.08015340510930193
Step 3000: cost = 0.05331286763848229
Step 4000: cost = 0.03959979558876129
Step 5000: cost = 0.031379339121557104
Step 6000: cost = 0.025934384769552735
Step 7000: cost = 0.022074762092810105
Step 8000: cost = 0.019201658213319397
Step 9000: cost = 0.016982474517266546
Final weights: [[-0.77468941 -0.23098022  1.01573101]
 [ 0.33582767  1.22953153 -1.55481181]]
Final bias: [ 3.2648068  -2.59633404 -0.66847277]


In [4]:
3+6


9

In [5]:
import sys

In [6]:
sys.setrecursionlimit(1000000)

In [None]:
def softmax(z):
    exp_z=np.exp(z-np.max(z))
    return exp_z/np.sum(exp_z)

def loss(w,b,x,y):
    m=x.shape[0]
    cost=0
    for i in range(m):
        z=np.dot(x[i],w)+b
        y_pred=softmax(z)
        cost -= np.sum(y[i]*np.log(y_pred))
    return cost/m

def gradient_descent(w,b,x,y,init_cost,alpha,max_step=10000,step=0):
    m=x.shape[0]
    n=len(w)
    if init_cost is None:
        init_cost=loss(w,b,x,y)
    new_w=w.copy()
    new_b=b.copy()
    dw = np.zeros_like(w)
    db = np.zeros_like(b)
    for i in range(m):
        z = np.dot(x[i], new_w) + new_b
        y_pred = softmax(z)
        # Gradient for weights: (y_pred - y_true) * x
        dw += np.outer(x[i], (y_pred - y[i]))
        # Gradient for bias: (y_pred - y_true)
        db += (y_pred - y[i])
    dw/=m
    db/=m
    new_w-=alpha*dw
    new_b-=alpha*db
    new_cost=loss(new_w,new_b,x,y)
    if new_cost<init_cost and step<max_step:
        return gradient_descent(new_w,new_b,x,y,new_cost,alpha,max_step,step+1)
    else:
        return new_w,new_b

np.random.seed(42)  # for consistent results
W = np.random.randn(2, 3) * 0.01  # (2 features, 3 classes)
b = np.zeros((1, 3)) 

x=np.array([[1.0,2.0],[1.5,1.8],[5.8,8.0],[6.0,9.0],[8.0,1.0],[9.0,2.5]])

y=np.array([[1,0,0],[1,0,0],[0,1,0],[0,1,0],[0,0,1],[0,0,1]])

gradient_descent(W,b,x,y,None,0.01)