In [14]:
import numpy as np
import pandas as pd

# Data
X = np.array([0.5, 2.5, 1.5])
Y = np.array([0.2, 0.9, 0.6])

# Parameters
w, b = -2.0, -2.0
lr = 0.3
beta = 0.9     # decay rate for RMSProp
eps = 1e-8
iterations = 5

# Accumulators for RMSProp
vw, vb = 0.0, 0.0

# Sigmoid
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

results = []

for it in range(1, iterations + 1):
    # Forward pass
    z = w * X + b
    y_pred = sigmoid(z)
    y_pred = np.round(y_pred, 7)
    
    # Loss (MSE)
    loss = np.mean((Y - y_pred) ** 2)
    
    # Gradients
    grad_w = np.mean(2 * (y_pred - Y) * y_pred * (1 - y_pred) * X)
    grad_b = np.mean(2 * (y_pred - Y) * y_pred * (1 - y_pred))
    
    # RMSProp accumulator update (exponential moving average)
    vw = beta * vw + (1 - beta) * grad_w**2
    vb = beta * vb + (1 - beta) * grad_b**2
    
    # RMSProp parameter update
    w -= (lr / (np.sqrt(vw) + eps)) * grad_w
    b -= (lr / (np.sqrt(vb) + eps)) * grad_b
    
    # Store iteration results
    results.append({
        "iter": it,
        "w": w,
        "b": b,
        "grad_w": grad_w,
        "grad_b": grad_b,
        "vw": vw,
        "vb": vb,
        "y_pred": y_pred.tolist(),
        "loss": loss
    })

# Convert results to DataFrame
df = pd.DataFrame(results)

import numpy as np
# df = df.round(3)

In [11]:
df

Unnamed: 0,iter,w,b,grad_w,grad_b,vw,vb,y_pred,loss
0,1,-1.051321,-1.051321,-0.007606,-0.00777,6e-06,6e-06,"[0.0474259, 0.0009111, 0.0066929]",0.394551
1,2,-0.107661,-0.119116,-0.069839,-0.039034,0.000493,0.000158,"[0.1712235, 0.0246115, 0.067339]",0.350287
2,3,0.8001,0.701112,-0.219368,-0.064846,0.005256,0.000563,"[0.4568709, 0.4041342, 0.4303049]",0.113554
3,4,0.514959,-0.044786,0.06854,0.090536,0.0052,0.001326,"[0.7504778, 0.937107, 0.8700343]",0.125774
4,5,0.47341,-0.469416,0.009484,0.054679,0.004689,0.001492,"[0.5529738, 0.7760181, 0.6742898]",0.048494


In [13]:
import numpy as np

# Data
X = np.array([0.5, 2.5, 1.5])
Y = np.array([0.2, 0.9, 0.6])

# Parameters
w, b = -2.0, -2.0
lr = 0.3
eps = 1e-8
iterations = 5

# Accumulators for Adagrad
vw, vb = 0.0, 0.0

# Sigmoid
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

results = []

for it in range(1, iterations+1):
    # Forward pass
    z = w * X + b
    y_pred = sigmoid(z)
    y_pred = np.round(y_pred, 7)
    
    # Loss (MSE)
    loss = np.mean((Y - y_pred) ** 2)
    
    # Gradients
    grad_w = np.mean(2 * (y_pred - Y) * y_pred * (1 - y_pred) * X)
    grad_b = np.mean(2 * (y_pred - Y) * y_pred * (1 - y_pred))
    
    # Update accumulators
    vw += grad_w**2
    vb += grad_b**2
    
    # Adagrad update
    w -= (lr / (np.sqrt(vw) + eps)) * grad_w
    b -= (lr / (np.sqrt(vb) + eps)) * grad_b
    
    results.append({
        "iter": it,
        "w": w,
        "b": b,
        "grad_w": grad_w,
        "grad_b": grad_b,
        "vw": vw,
        "vb": vb,
        "y_pred": y_pred.tolist(),
        "loss": loss
    })

import pandas as pd
df = pd.DataFrame(results)
df

Unnamed: 0,iter,w,b,grad_w,grad_b,vw,vb,y_pred,loss
0,1,-1.7,-1.7,-0.007606,-0.00777,5.8e-05,6e-05,"[0.0474259, 0.0009111, 0.0066929]",0.394551
1,2,-1.432955,-1.444204,-0.014859,-0.012681,0.000279,0.000221,"[0.0724264, 0.0025991, 0.0140636]",0.388308
2,3,-1.176351,-1.204411,-0.02756,-0.019782,0.001038,0.000613,"[0.1033373, 0.0065184, 0.0267623]",0.378752
3,4,-0.923347,-0.97058,-0.050567,-0.030792,0.003595,0.001561,"[0.1427558, 0.0155919, 0.048849]",0.363074
4,5,-0.672546,-0.738994,-0.091352,-0.047974,0.01194,0.003862,"[0.1927479, 0.0363006, 0.0866217]",0.336529


In [None]:
import numpy as np

# Training dataset
X = np.array([0.5, 1.0])
Y = np.array([0.2, 0.4])

# Parameters
w, b = 0.1, 0.0

# Hyperparameters
eta = 0.1    # learning rate
epsilon = 1e-8
beta1, beta2 = 0.9, 0.999
iterations = 5

# Adam moment estimates
m_w, v_w = 0, 0
m_b, v_b = 0, 0

# Activation function
def tanh(z):
    return np.tanh(z)

# Derivative of tanh
def tanh_derivative(z):
    return 1 - np.tanh(z) ** 2

print("Initial w =", w, " b =", b)
print("===========================================")

for t in range(1, iterations + 1):
    grad_w, grad_b, loss = 0, 0, 0
    
    # Loop over dataset
    for x, y in zip(X, Y):
        z = w * x + b
        y_pred = tanh(z)
        print(f"x={x}, y={y}, z={z:.6f}, y_pred={y_pred:.6f}")
        
        # Mean squared error loss
        loss += 0.5 * (y_pred - y) ** 2
        
        # Gradients
        dL_dy = (y_pred - y)
        dy_dz = tanh_derivative(z)
        dz_dw = x
        dz_db = 1
        
        grad_w += dL_dy * dy_dz * dz_dw
        grad_b += dL_dy * dy_dz * dz_db
    
    # Average gradients
    grad_w /= len(X)
    grad_b /= len(X)
    loss /= len(X)

    # Adam updates
    m_w = beta1 * m_w + (1 - beta1) * grad_w
    v_w = beta2 * v_w + (1 - beta2) * (grad_w ** 2)
    m_b = beta1 * m_b + (1 - beta1) * grad_b
    v_b = beta2 * v_b + (1 - beta2) * (grad_b ** 2)

    # Bias correction
    m_w_hat = m_w / (1 - beta1 ** t)
    v_w_hat = v_w / (1 - beta2 ** t)
    m_b_hat = m_b / (1 - beta1 ** t)
    print("m_b_hat:")
    print(m_b_hat)
    v_b_hat = v_b / (1 - beta2 ** t)
    print("v_b_hat:")
    print(v_b_hat)

    # Parameter updates
    w = w - eta * m_w_hat / (np.sqrt(v_w_hat) + epsilon)
    b = b - eta * m_b_hat / (np.sqrt(v_b_hat) + epsilon)

    print(f"Iteration {t}")
    print(f" Loss={loss:.6f}")
    print(f" grad_w={grad_w:.6f}, grad_b={grad_b:.6f}")
    print(f" m_w={m_w:.6f}, v_w={v_w:.6f}, m_b={m_b:.6f}, v_b={v_b:.6f}")
    print(f" w={w:.6f}, b={b:.6f}")
    print("===========================================")


In [1]:
import numpy as np
import pandas as pd

# Data
X = np.array([0.5, 2.5, 1.5])
Y = np.array([0.2, 0.9, 0.6])

# Parameters
w, b = 1.5, 1.2
lr = 0.3
eps = 1e-8
iterations = 5
beta1, beta2 = 0.9, 0.999

# Moment estimates for Adam
mw = mb = 0.0
vw = vb = 0.0

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

results = []

for it in range(1, iterations+1):
    # Forward pass
    z = w * X + b
    y_pred = sigmoid(z)
    y_pred = np.round(y_pred, 7)
    
    # Loss (MSE)
    loss = np.mean((Y - y_pred) ** 2)
    
    # Gradients
    grad_w = np.mean(2 * (y_pred - Y) * y_pred * (1 - y_pred) * X)
    grad_b = np.mean(2 * (y_pred - Y) * y_pred * (1 - y_pred))
    
    # Update biased first and second moments
    mw = beta1 * mw + (1 - beta1) * grad_w
    mb = beta1 * mb + (1 - beta1) * grad_b
    vw = beta2 * vw + (1 - beta2) * (grad_w ** 2)
    vb = beta2 * vb + (1 - beta2) * (grad_b ** 2)
    
    # Bias correction
    mw_hat = mw / (1 - beta1 ** it)
    mb_hat = mb / (1 - beta1 ** it)
    vw_hat = vw / (1 - beta2 ** it)
    vb_hat = vb / (1 - beta2 ** it)
    
    # Adam parameter update
    w -= lr * mw_hat / (np.sqrt(vw_hat) + eps)
    b -= lr * mb_hat / (np.sqrt(vb_hat) + eps)
    
    results.append({
        "iter": it,
        "w": w,
        "b": b,
        "grad_w": grad_w,
        "grad_b": grad_b,
        "mw": mw,
        "mb": mb,
        "vw": vw,
        "vb": vb,
        "y_pred": y_pred.tolist(),
        "loss": loss
    })

df = pd.DataFrame(results)
df


Unnamed: 0,iter,w,b,grad_w,grad_b,mw,mb,vw,vb,y_pred,loss
0,1,1.2,0.9,0.036644,0.056874,0.003664,0.005687,1e-06,3e-06,"[0.8754466, 0.9929664, 0.9692311]",0.200401
1,2,0.902129,0.600781,0.053188,0.075703,0.008617,0.012689,4e-06,9e-06,"[0.8175745, 0.9801597, 0.9370267]",0.167137
2,3,0.604746,0.301199,0.068509,0.090772,0.014606,0.020497,9e-06,1.7e-05,"[0.7411292, 0.9456333, 0.8758794]",0.123671
3,4,0.306247,0.000455,0.056027,0.08486,0.018748,0.026934,1.2e-05,2.4e-05,"[0.6464731, 0.8597317, 0.7700012]",0.07662
4,5,0.139693,-0.277714,-0.047362,0.02672,0.012137,0.026912,1.4e-05,2.5e-05,"[0.5383194, 0.6826707, 0.6129728]",0.053953
