# Logistic Regression


Logistic regression is a classification that assumes there exists linear relationship between the observable signals (features) $x = (x_1, x_2, ..., x_k)$ and the outcome $y$. Mathematically, it is represented as $\hat{y}=\sigma (w^Tx) = \frac{1}{1 + e^{w^Tx}}$, where w is the weight vector of the model learned against the given training data.

![Logistic Regression](https://github.com/yxjiang/ml_playground/blob/master/hand_made_algorithm/imgs/logistic_regression.png?raw=true)

Cross entropy (logloss) is leveraged to quantify the accuracy of the model, i.e. $C = -\sum_m y^{(m)} \log{\hat{y^{(m)}}}$, or $C = -\sum_m y^{(m)}\log \hat{y^{(m)}} - (1 - y)\log{1 - \hat{y^{(m)}}}$ for binary classification case.

In [67]:
import numpy as np

class LogisticRegression:
    
    def __init__(self, ndim, l2_weight):
        self.W = np.random.randn(ndim + 1, 1)  # ndim + bias
        self.l2_weight = l2_weight
        
    def sigmoid_(self, y):
        return 1 / (1 + np.exp(-y))
        
    def predict(self, X):
        """Conduct prediction for the given batch of X."""
        bias = np.ones((X.shape[0], 1))  # dim: (n_batch, 1)
        X = np.concatenate((bias, X), axis=1)  # dim: (n_batch, d+1)
        return self.sigmoid_(self.W.T.dot(X.T).T)  # dim: (n_batch, 1)
    
    def train(self, X, y, lr):
        outputs = self.predict(X)  # dim: (n_batch, 1)
        preds_diff = -(np.expand_dims(y, axis=1) - outputs)  # dim: (n_batch, 1)
        bias = np.ones((X.shape[0], 1))  # dim: (n_batch, 1)
        X_with_bias = np.concatenate((bias, X), axis=1)
        dW = np.sum(preds_diff * X_with_bias + self.l2_weight * self.W.T, axis=0)
        self.W -= lr * np.expand_dims(dW, axis=1)
        return abs(np.sum(preds_diff) / len(preds_diff))

In [81]:
n_dim, n_batch = 15, 10
model = LogisticRegression(ndim=n_dim, l2_weight=0.01)

for it in range(1000):
    X = np.random.randn(n_batch, n_dim)
    y = np.array(np.sum(X, axis=1) >= 1.0, dtype=float)
    loss = model.train(X, y, lr=0.05)
    if it % 100 == 0:
        print('loss: %.3f' % loss)

loss: 0.188
loss: 0.008
loss: 0.025
loss: 0.109
loss: 0.008
loss: 0.050
loss: 0.040
loss: 0.008
loss: 0.078
loss: 0.036
