# Logistic Regression


Logistic regression is a classification that assumes there exists linear relationship between the observable signals (features) $x = (x_1, x_2, ..., x_k)$ and the outcome $y$. Mathematically, it is represented as $\hat{y}=\sigma (w^Tx) = \frac{1}{1 + e^{w^Tx}}$, where w is the weight vector of the model learned against the given training data.

![Logistic Regression](https://github.com/yxjiang/ml_playground/blob/master/hand_made_algorithm/imgs/logistic_regression.png?raw=true)

Cross entropy (logloss) is leveraged to quantify the accuracy of the model, i.e. $C = -\sum_m y^{(m)} \log{\hat{y^{(m)}}}$, or $C = -\sum_m y^{(m)}\log \hat{y^{(m)}} - (1 - y)\log{1 - \hat{y^{(m)}}}$ for binary classification case.

In [64]:
import numpy as np

class LogisticRegression:
    
    def __init__(self, ndim, l2_weight):
        self.W = np.random.randn(ndim + 1, 1)  # ndim + bias
        self.l2_weight = l2_weight
        
    def sigmoid_(self, y):
        return 1 / (1 + np.exp(-y))
        
    def predict(self, X):
        """Conduct prediction for the given batch of X."""
        bias = np.ones((X.shape[0], 1))  # dim: (n_batch, 1)
        X = np.concatenate((bias, X), axis=1)  # dim: (n_batch, d+1)
        return self.sigmoid_(self.W.T.dot(X.T).T)  # dim: (n_batch, 1)
    
    def train(self, X, y, lr):
        outputs = self.predict(X)  # dim: (n_batch, 1)
        preds_diff = -(np.expand_dims(y, axis=1) - outputs)  # dim: (n_batch, 1)
        bias = np.ones((X.shape[0], 1))  # dim: (n_batch, 1)
        X_with_bias = np.concatenate((bias, X), axis=1)
        dW = np.sum(preds_diff * X_with_bias + self.l2_weight * self.W.T, axis=0)
        self.W -= lr * np.expand_dims(dW, axis=1)
        return abs(np.sum(preds_diff) / len(preds_diff))

In [65]:
n_dim, n_batch = 15, 10
model = LogisticRegression(ndim=n_dim, l2_weight=0.01)

for it in range(100):
    X = np.random.randn(n_batch, n_dim)
    y = np.array(np.sum(X, axis=1) >= 1.0, dtype=float)
    loss = model.train(X, y, lr=0.001)
    if it % 10 == 0:
        print('loss: %.3f' % loss)

[[1.00000000e+00 9.78491077e-01]
 [1.00000000e+00 9.99270822e-01]
 [0.00000000e+00 7.24929625e-02]
 [0.00000000e+00 3.48036274e-01]
 [0.00000000e+00 4.48700322e-04]
 [1.00000000e+00 2.26776883e-02]
 [0.00000000e+00 9.98851584e-01]
 [1.00000000e+00 5.08315535e-03]
 [1.00000000e+00 9.51030934e-01]
 [0.00000000e+00 1.85582938e-01]]
loss: 0.044
[[1.         0.0221626 ]
 [1.         0.47753335]
 [1.         0.58815829]
 [0.         0.99908571]
 [0.         0.06682527]
 [1.         0.82890758]
 [1.         0.14134524]
 [1.         0.09230863]
 [0.         0.08452156]
 [0.         0.39804332]]
[[1.         0.07087007]
 [1.         0.97936226]
 [0.         0.99174037]
 [1.         0.48549428]
 [1.         0.00345863]
 [0.         0.88267455]
 [1.         0.2263869 ]
 [0.         0.38389672]
 [1.         0.99993571]
 [0.         0.96021466]]
[[1.         0.83039486]
 [0.         0.69089454]
 [0.         0.00156239]
 [0.         0.86417563]
 [1.         0.34710274]
 [0.         0.00611636]
 [1. 