### An Implementation Of Batch Gradient Descent With Early Stopping For Softmax Regression Without Using Scikit-learn

In [51]:
import numpy as np

In [52]:
def get_gradient_scalar(x_i, theta_matrix, output_class_k, true_outcome):
    '''return p_k^(i) hat - y_k^(i)'''
    
    matrix_product = x_i @ theta_matrix
    pki_hat = np.exp(matrix_product[output_class_k]) / np.sum(np.exp(matrix_product[output_class_k])) # probability
    
    yki = 1 if output_class_k == true_outcome else 0
    return pki_hat - yki

In [53]:
def get_theta_gradient(output_class_k, X, y, theta_matrix):
    theta_gradient = np.zeros((1, X.shape[1]))
    m = X.shape[0]
    for x_i, true_outcome in zip(X, y):
        theta_gradient += get_gradient_scalar(x_i, theta_matrix, output_class_k, true_outcome) * x_i
    
    return theta_gradient / m

In [57]:
def my_batch_GD(X, y, num_output_classes, alpha = 0.1, epochs = 10):
    theta_matrix = np.random.rand(X[0].size, num_output_classes)
    
    for e in range(epochs):
        delta_theta_matrix = np.array([]).reshape((0, X.shape[1]))
        for k in range(num_output_classes):
            delta_theta_matrix = np.vstack((delta_theta_matrix, np.array(get_theta_gradient(k, X, y, theta_matrix))))
        theta_matrix += alpha * np.transpose(delta_theta_matrix)
    return theta_matrix

In [64]:
print(my_batch_GD(np.array( [ [1,2,3], [4,5,6], [7,8,9], [10,11,12] ] ), np.array([0, 1, 1, 0]), 3))

[[2.80881247 3.12257036 5.88361768]
 [3.61153451 3.70904571 6.63699555]
 [4.62962559 3.82304101 7.60389623]]


In [65]:
def fit_and_make_prediction(X_train, y_train, X_test, num_output_classes):
    theta_matrix = my_batch_GD(X_train, y_train, num_output_classes)
    relative_weights = X_test @ theta_matrix
    return np.argmax(relative_weights, axis=1)

In [66]:
from sklearn import datasets
iris = datasets.load_iris()

In [74]:
X = iris["data"][:, (2,3)]
y = iris["target"]

In [76]:
# Reference regressor
from sklearn.linear_model import LogisticRegression

softmax_reg = LogisticRegression(multi_class="multinomial", solver="lbfgs", C=10, random_state=11)
softmax_reg.fit(X, y)

y_pred = softmax_reg.predict(X)
print(len(y_pred[y_pred == y]))
print(len(y_pred))

144
150


In [None]:
y_