# Multi-class Classification

In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

from scipy.io import loadmat
from scipy.optimize import minimize

from sklearn.linear_model import LogisticRegression

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_seq_items', None)
 
%matplotlib inline

## Loading Dataset

In [None]:
data = loadmat('ex3data1.mat')
data.keys()

In [None]:
y = data['y']
X = np.c_[np.ones((data['X'].shape[0],1)), data['X']]

print('X: {} (with intercept)'.format(X.shape))
print('y: {}'.format(y.shape))

## Visualizing the data

In [None]:
sample = np.random.choice(X.shape[0], 20)
plt.figure(figsize=(15,10))
plt.imshow(X[sample,1:].reshape(-1,20).T, cmap="Greys")
plt.axis('off');

In [None]:
def sigmoid(z):
    return(1 / (1 + np.exp(-z)))

## Vectorizing Logistic Regression

In [None]:
def lrcostFunctionReg(theta, reg, X, y):
    m = y.size
    
    # ====================== YOUR CODE HERE ======================
    # Instructions: Compute the cost of a particular choice of theta.
    #               You should set J to the cost.
    #               Compute the partial derivatives and set grad to the partial
    #               derivatives of the cost w.r.t. each parameter in theta
    # Hint you may find np.dot() useful in vectorizing computations
    
    
    
    
    
    # =============================================================
    if np.isnan(J[0]):
        return(np.inf)
    return(J[0])    

In [None]:
def lrgradientReg(theta, reg, X,y):
    m = y.size
    
    # ====================== YOUR CODE HERE ======================
    # Instructions: Compute the partial derivatives and set grad to the partial
    #               derivatives of the cost w.r.t. each parameter in theta
    # Hint you may find np.dot() useful in vectorizing computations
    
    
    
    
    
    # =============================================================
    return(grad.flatten())

## One-vs-all Classification

In [None]:
def oneVsAll(features, classes, n_labels, reg):
    initial_theta = np.zeros((X.shape[1],1))  # 401x1
    all_theta = np.zeros((n_labels, X.shape[1])) #10x401

    for c in np.arange(1, n_labels+1):
        res = minimize(lrcostFunctionReg, initial_theta, args=(reg, features, (classes == c)*1), method=None,
                       jac=lrgradientReg, options={'maxiter':50})
        all_theta[c-1] = res.x
    return(all_theta)

In [None]:
theta = oneVsAll(X, y, 10, 0.1)
theta

In [None]:
def predictOneVsAll(all_theta, features):
    # ====================== YOUR CODE HERE ======================
    # Instructions: Complete the following code to make predictions using
    #               your learned logistic regression parameters (theta).
    #               You should set p to a vector of predictions (from 1 to
    #               num_labels).
    # Hint: Use np.argmax()
    
    
    
    # =============================================================
    return p

In [None]:
pred = predictOneVsAll(theta, X)
print('Training set accuracy: {} %'.format(np.mean(pred == y.ravel())*100))

## Multiclass Logistic Regression with scikit-learn

In [None]:
clf = LogisticRegression(C=10, penalty='l2', solver='liblinear')
# Scikit-learn fits intercept automatically, so we exclude first column with 'ones' from X when fitting.
clf.fit(X[:,1:],y.ravel())

In [None]:
pred2 = clf.predict(X[:,1:])
print('Training set accuracy: {} %'.format(np.mean(pred2 == y.ravel())*100))

# Neural Networks

## Model representation

In [None]:
theta1, theta2 = weights['Theta1'], weights['Theta2']

weights = loadmat('ex3weights.mat')
weights.keys()

print('theta1: {}'.format(theta1.shape))
print('theta2: {}'.format(theta2.shape))

## Feedforward Propagation and Prediction

In [None]:
def predict(theta_1, theta_2, features):
    # ====================== YOUR CODE HERE ======================
    # Instructions: Complete the following code to make predictions using
    #               your learned neural network. You should set p to a 
    #               vector containing labels between 1 to num_labels.
    
    
    
    
    # =============================================================
    return p

In [None]:
pred = predict(theta1, theta2, X)
print('Training set accuracy: {} %'.format(np.mean(pred == y.ravel())*100))