In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import datasets

from sklearn.metrics import accuracy_score

In [2]:
wine = datasets.load_wine()
X = wine['data']
y = wine['target']

In [9]:
def softmax(Z):
    return (np.exp(Z)/(np.exp(Z).sum(1)[:, None]))

def standard_scaler(X):
    return (X - X.mean(axis=0))/X.std(axis=0)

def make_I_matrix(y):
    """
    Creates a one-hot encoding representation for y.
    """
    I = np.zeros((len(y), len(np.unique(y))), dtype=int)
    for j, target in enumerate(np.unique(y)):
        I[:, j] = (y == target)
        
    return I

We use gradient descent to find $B$ parameters

$\frac{\partial L(B)}{\partial\beta} = \frac{\partial -log L(B)}{\partial\beta} = X^T(I-P)$

In [18]:
class MulticlassLogisticRegression:
    
    def fit(self, X, y, n_iter, lr, 
            standardize=True, has_intercept=False):
        
        if standardize:
            X = standard_scaler(X)
        
        if not has_intercept:
            ones = np.ones(X.shape[0]).reshape(-1, 1)
            X = np.concatenate((ones, X), axis=1)
        
        self.X = X
        self.N, self.D = X.shape
        self.y = y
        self.K = len(np.unique(y))
        self.n_iter = n_iter
        self.lr = lr
        
        B = np.random.randn(self.D*self.K).reshape((self.D, self.K))
        self.I = make_I_matrix(self.y)
        for i in range(n_iter):
            Z = np.dot(self.X, B)
            P = softmax(Z)
            gradient = np.dot(self.X.T, self.I-P)
            B += lr*gradient
            
        self.B = B
        self.Z = np.dot(self.X, B)
        self.P = softmax(self.Z)
        self.yhat = self.P.argmax(axis=1)

In [19]:
mc_model = MulticlassLogisticRegression()
mc_model.fit(X, y, 10**4, 0.0001)

print("In-sample acc: %.3f" % (accuracy_score(mc_model.y, mc_model.yhat)))

In-sample acc: 1.000
