In [None]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

import matplotlib
matplotlib.pyplot.style.use('seaborn')
matplotlib.rcParams['figure.figsize'] = (15, 5)

%matplotlib inline

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
np.set_printoptions(precision=2, suppress=True)

In [None]:
import math
import copy

import scipy.stats as stats

In [None]:
from sklearn import model_selection, metrics, datasets

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Model

##### $y = Xw + \epsilon$

$\begin{bmatrix}
y_0 \\
... \\
y_n \\
\end{bmatrix} = $
$\begin{bmatrix}
x_0^0 & x_0^1 &  ... & x_0^j \\
... \\
x_n^0 & x_n^1 &  ... & x_n^j
\end{bmatrix}$
$\begin{bmatrix}
w_0 \\
...\\
w_j
\end{bmatrix} +$
$\begin{bmatrix}
\epsilon_0 \\
...\\
\epsilon_n
\end{bmatrix}$

# Classification

### Link function: squeeze regression real line into $[0,1]$ (and make it probability)

$P(y=1|X, w) = Sigmoid(Model(X, w))$

---

### Logistic function (sigmoid, logit)

### $Sigmoid(X, w)  = \sigma(X,w) = \frac{1}{1 + e^{-Xw}}$

In [None]:
def sigmoid(M):
    return 1 / (1 + np.exp(-M))

In [None]:
xs = np.linspace(-5, 5, 100)
ys = [sigmoid(x) for x in xs]
plt.plot(xs, ys);

In [None]:
X = np.array([
    [1, 2, 5],
    [1, 5, 5],
    [1, 8, 5],
], dtype=np.float64)

w = np.array([0.1, 0.1, 0.1], dtype=np.float64)

proba = sigmoid(X.dot(w))
proba

# Cost function

$Cost(w, X, y) = - \sum\left( y_i log(\sigma(w x_i)) + (1-y_i)log(\sigma(-w x_i)) \right)$

$-\left\| y * log(\sigma(Xw)) + (1-y)log(\sigma(-Xw)) \right\|_2$

$\triangledown Cost = -\sum x_i \left( y_i - \sigma(w x_i) \right)$

$-X^T \left( y - \sigma(Wx) \right)$

In [None]:
X = np.array([
    [1, 2, 15,  8],
    [1, 5, 45, 12],
    [1, 8, 53, 33],
], dtype=np.float64)

y = np.array([0, 1, 0], dtype=np.float64)
w = np.array([0.1, 0.1, 0.1, 0.1], dtype=np.float64)


part1 = y * np.log(sigmoid(X.dot(w)))
part2 = (1-y) * np.log(sigmoid(-X.dot(w)))
cost = -np.linalg.norm(part1 + part2)
cost

grad = -X.T.dot(y - X.dot(w))
grad

# Task - Cost function minimization

$Cost(w, X, y) \Rightarrow \underset{w}{min}$

In [None]:
def logistic(w, X, y):
    part1 = y * np.log(sigmoid(X.dot(w)))
    part2 = (1-y) * np.log(sigmoid(-X.dot(w)))
    return -np.linalg.norm(part1 + part2)

In [None]:
def glogistic(w, X, y):
    return -X.T.dot(y - X.dot(w))

In [None]:
def ridge(w, l):
    w = w.copy()
    w[0] = 0 # Don’t penalize intercept term w0
    return 2 * l * w

In [None]:
def minimize(X, y, cost, grad, reg, iterations, epsilon, alpha, reg_coef):
    # add coef of ones
    X = np.append(np.ones(len(X[0])), X.T).reshape(4,3).T

    # initilize weights vector
    w = np.zeros(len(X[0]), dtype=np.float64)

    # parameters
    weights = [w]
    error = []

    for iteration in range(iterations):
        w = w - alpha * (grad(w, X, y) + reg(w, reg_coef))
        if np.linalg.norm(w - weights[-1]) < epsilon:
            break
        weights.append(w)
        error.append(cost(w, X, y))

    return w[0], w[1:], error

In [None]:
def predict(X, w, coef):
    X = np.append(np.ones(len(X[0])), X.T).reshape(4,3).T
    w = np.append(np.array(coef), w)

    return sigmoid(X.dot(w))

In [None]:
X = np.array([
    [0.2, 0.15, 0.8],
    [0.5, 0.45, 0.12],
    [0.8, 0.53, 0.33],
], dtype=np.float64)

y = np.array([
    0,
    1,
    0,
], dtype=np.float64)

In [None]:
coef, w, error = minimize(X, y, logistic, glogistic, ridge,
                          iterations = 5000, epsilon = 0.0003, alpha = 0.1, reg_coef = 0.001)
coef
w

In [None]:
plt.plot(error);

In [None]:
predict(X, w, coef)

# Check by sklearn

In [None]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression().fit(X, y)

reg.intercept_ 

reg.coef_
reg.predict(X)