# Multi-class classification

In [206]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat

In [207]:
data = loadmat('ex3data1.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

In [208]:
data['X'].shape, data['y'].shape

((5000, 400), (5000, 1))

In [209]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

## Cost Function 
$J\left( \theta  \right)=\frac{1}{m}\sum\limits_{i=1}^{m}{[-{{y}^{(i)}}\log \left( {{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)-\left( 1-{{y}^{(i)}} \right)\log \left( 1-{{h}_{\theta }}\left( {{x}^{(i)}} \right) \right)]}$

In [210]:
def reg_cost(theta, X, y, lamda):
    theta = np.matrix(theta) # there is problem without this line
    X = np.matrix(X)
    y = np.matrix(y)
    m = len(y)
    hx = X * theta.T
    first = np.multiply(-y, np.log(sigmoid(hx)))
    second = np.multiply((1-y), np.log(1 - sigmoid(hx)))
    reg = (lamda / (2 * m)) * np.sum(np.power(theta[:,1:theta.shape[1]], 2))
    cost = np.sum(first - second) / m + reg
    return cost

In [211]:
def reg_gradient(theta, X, y, lamda):
    m = len(y)
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    
    temp = (1 / m) * (sigmoid(X * theta.T) - y).T * X
    theta[0, 0] = 0
    reg = (lamda / m) * theta
    grad = np.matrix(temp + reg)
    return grad

In [212]:
from scipy.optimize import minimize

def one_vs_all(X, y, num_labels, learning_rate):
    X = np.matrix(X)
    rows = X.shape[0]
    features = X.shape[1]
    all_theta = np.matrix(np.zeros((num_labels, features + 1)))
    X = np.insert(X, 0, values=np.ones(rows), axis=1)
    for i in range(1, num_labels + 1):
        theta = np.zeros(features + 1)
        y_i = np.array([1 if label == i else 0 for label in y])
        y_i = np.reshape(y_i, (rows, 1))
        fmin = minimize(fun = reg_cost, x0 = theta, args=(X, y_i, learning_rate), method='TNC', jac = reg_gradient)
        all_theta[i-1,:] = fmin.x
    return all_theta

In [215]:
X = data['X']
y = data['y']
num_labels = 10
learning_rate = 1
all_theta = one_vs_all(X, y, num_labels, learning_rate)

In [225]:
print(all_theta.shape)
def predict_all(X, all_theta):
    X = np.insert(X, 0, values=np.ones(rows), axis=1)
    temp = sigmoid(X * all_theta.T)
    h_argmax = np.argmax(temp, axis=1)
    h_argmax = h_argmax + 1
    return h_argmax
predict_all(X, all_theta)

(10, 401)
(5000, 10)
(5000, 1)
