In [28]:
import numpy as np

from scipy.optimize import minimize
from scipy.io import loadmat

from bokeh.plotting import show, figure
from bokeh.io import output_notebook
from bokeh.layouts import column, row, gridplot
                 
from math import nan
                 
from time import time

In [3]:
output_notebook()

In [13]:
def normalize_data(X):
    tmp_X = np.copy(X)
    
    mu_X = X.mean(axis=0)
    std_X = X.std(axis=0)
    
    mu_X[0] = 0
    std_X[0] = 1
    
    tmp_X -= mu_X
    tmp_X /= std_X
    
    tmp_X[0] = 1
    
    return tmp_X, mu_X, std_X


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def hypothesis(X, Theta):
    return sigmoid(X.dot(Theta.transpose()))


def cost(Theta, X, y, lambd):
    
    m, n = X.shape
    
    Z = X.dot(Theta)
    
    return (-1/m) * (y.transpose().dot(np.log(sigmoid(Z))) + (1 - y).transpose().dot(sigmoid(1 - Z))   
                     + lambd*Theta[1:].transpose().dot(Theta[1:]))


def gradient(Theta, X, y, lambd):
    
    m, n = X.shape
    
    H = sigmoid(X.dot(Theta))
    
    tmp_Theta = np.copy(Theta)
    
    tmp_Theta[0] = 0
    
    return 1/m * (X.transpose().dot(H - y) + lambd*tmp_Theta)


def tmp_minimize_cost(X, y, lambd, method = 'BFGS'):
    m, n = X.shape
    
    Theta = np.zeros(n)

    return minimize(fun = cost, 
                       x0 = Theta, 
                       args = (X, y, lambd),
                       method = method,
                       jac = gradient)


def minimize_cost(X, y, lambd, K, method = 'BFGS'):
    
    m, n = X.shape
    
    if K < 3:
        result = tmp_minimize_cost(X, y, lambd, method)
        
        return result['x'], result['fun']
    
    else:
        
        All_theta = np.zeros((K, n))
        All_costs = np.zeros(K)
    
        for i in range(0, K):

            tmp_y = np.copy(y)

            good = np.where(tmp_y == i)
            bad = np.where(tmp_y != i)

            tmp_y[good] = 1
            tmp_y[bad] = 0

            result = tmp_minimize_cost(X, tmp_y, lambd, method)

            All_theta[i] = result['x']
            All_costs[i] = result['fun']
        
        return All_theta, All_costs

    
def tmp_grad_descent(X, y, alpha, lambd, iterations, get_plt = False):
    m, n = X.shape
    
    Theta = np.zeros(n)
    
    if get_plt:
        iter_x_data = np.arange(iterations)
        iter_y_data = np.zeros(iterations)
    
    for i in range(iterations):
        grad = gradient(Theta, X, y, lambd)
        Theta -= alpha*grad
        
        if get_plt:
            iter_y_data[i] = cost(Theta, X, y, lambd)
      
    if get_plt:
        return Theta, iter_x_data, iter_y_data
    
    else:
        return Theta

    
def grad_descent(X, y, alpha, lambd, K, iterations):
    
    if K < 3:
        Theta = tmp_grad_descent(X, y, alpha, lambd, iterations)
        return Theta, cost(Theta, X, y, lambd)
    
    else:
        
        All_theta = np.zeros((K, n))
        All_costs = np.zeros(K)
        
        for i in range(0, K):

            tmp_y = np.copy(y)

            good = np.where(tmp_y == i)
            bad = np.where(tmp_y != i)

            tmp_y[good] = 1
            tmp_y[bad] = 0

            All_theta[i] = tmp_grad_descent(X, tmp_y, alpha, lambd, iterations)
            All_costs[i] = cost(All_theta[i], X, y, lambd)
        
        return All_theta, All_costs
    

def predict(X, Theta):
    
    predictions = hypothesis(X, Theta)
    
    if len(predictions.shape) == 1:
        predictions[predictions >= 0.5] = 1
        predictions[predictions < 0.5] = 0
        
    else:
        predictions = np.argmax(predictions, axis=1)
    
    return predictions


def accuracy(y, predicted_y):
    
    correct = predicted_y[predicted_y == y]
    
    right = correct.size / y.size
    wrong = 1 - right
    
    return {'correct':right, 'error':wrong}

In [14]:
#Connect Four

# raw_data = np.genfromtxt(r"C:\Users\Dell\Documents\CS\Machine Learning\Datasets\connect-4.txt", delimiter=',', dtype=np.str)

# #b = 0, x = 1, o = 2, 
# #loss = 0, win = 1, draw =2
# raw_data[raw_data=='b'] = 0
# raw_data[raw_data=='x'] = 1
# raw_data[raw_data=='o'] = 2
# raw_data[raw_data=='loss'] = 0
# raw_data[raw_data=='win'] = 1
# raw_data[raw_data=='draw'] = 2

# raw_data = raw_data.astype(np.float32)
# raw_data = np.insert(raw_data, 0, 1, axis=1)

# X = raw_data[:, :-1]
# y = raw_data[:, -1]

# m, n = X.shape

# K = 3

# #X, mu_X, std_X = normalize_data(X)

# lambd = 1

In [15]:
# #Breast cancer

# raw_data = np.genfromtxt(r"C:\Users\Dell\Documents\CS\Machine Learning\Datasets\bsc.txt", delimiter=',')
# raw_data = np.nan_to_num(raw_data)
# raw_data = raw_data[:, 1:]
# raw_data[np.where(raw_data[:, -1] == 2), -1] = 0
# raw_data[np.where(raw_data[:, -1] == 4), -1] = 1
# raw_data = np.insert(raw_data, 0, 1, axis=1)

# X = raw_data[:, :-1]
# y = raw_data[:, -1]

# m, n = X.shape

# K = 2

# #X, mu_X, std_X = normalize_data(X)

# lambd = 1

In [16]:
# #Pen Digits

# raw_data = np.genfromtxt(r"C:\Users\Dell\Documents\CS\Machine Learning\Datasets\pendigits_tes.txt", delimiter=',')
# raw_data = np.insert(raw_data, 0, 1, axis=1)

# X = raw_data[:, :-1]
# y = raw_data[:, -1]

# m, n = X.shape

# K = 10

# #X, mu_X, std_X = normalize_data(X)

# lambd = 1

In [17]:
# #Opt Digits

# raw_data = np.genfromtxt(r"C:\Users\Dell\Documents\CS\Machine Learning\Datasets\optdigits_tes.txt", delimiter=',')
# raw_data = np.insert(raw_data, 0, 1, axis=1)

# X = raw_data[:, :-1]
# y = raw_data[:, -1]

# m, n = X.shape

# K = 10

# #X, mu_X, std_X = normalize_data(X)

# lambd = 1

In [18]:
#Andrew's handwritten digit recognition

contents1 = loadmat(r"C:\Users\Dell\Documents\CS\Machine Learning\Andrew's Assignements\machine-learning-ex4\ex4\ex4data1.mat")
tmp_X = contents1['X']
tmp_y = contents1['y']
tmp_y[tmp_y == 10] = 0
tmp_y = tmp_y.flatten()

raw_data = np.insert(tmp_X, 400, tmp_y, axis = 1)

np.random.shuffle(raw_data)

K = 10

all_examples = raw_data[:, :].shape[0]

examples = int(0.7 * all_examples)
test_examples = all_examples - examples

X = raw_data[:examples, :-1]
y = raw_data[:examples, -1]

test_X = raw_data[examples: examples + test_examples ,:-1]
test_y = raw_data[examples: examples + test_examples , -1]

m, n = X.shape #m - no of examples, n no of parameters

lambd = 3

In [19]:
%%time
All_theta, All_costs = minimize_cost(X, y, lambd, K, 'BFGS')

Wall time: 42.8 s


In [22]:
accuracy(test_y, predict(test_X, All_theta))

88.13333333333334

In [25]:
%%time
All_theta, All_costs = grad_descent(X, y, 1/m , lambd, K, 1000)

Wall time: 3min 36s


In [27]:
predictions = predict(X, All_theta)
accuracy(y, predictions)

71.77142857142857