In [1]:
import numpy as np

from scipy.optimize import minimize
from scipy.io import loadmat

from bokeh.plotting import show, figure
from bokeh.io import output_notebook
from bokeh.layouts import column, row, gridplot
                 
from math import nan, inf
                 
from time import time

In [2]:
output_notebook()

In [None]:
def sigmoid(z):
    return 1 / (1 + np.nan_to_num(np.exp(-z)))


def get_nn_y(y, K):
    
    m = y.size
    
    tmp_y = np.copy(y)
    
    tmp_y = tmp_y.astype(np.int)
    
    nn_y = np.zeros((m, K))
    
    for i in range(m):
        nn_y[i, tmp_y[i]] = 1
    
    return nn_y


def get_theta_shapes(layers):
    
    L = len(layers)
    
    theta_shapes = []
    
    for i in range(L - 1):
        
        curr_size = layers[i]
        next_size = layers[i + 1]
        
        theta_shapes.append((next_size, curr_size + 1))
    
    return theta_shapes


def get_rand_thetas(layers):
    
    theta_shapes = get_theta_shapes(layers)
    
    L = len(layers)
    
    thetas = []
    
    for i in range(L - 1):
        
        eps = np.sqrt(6/(layers[i] + layers[i + 1]))
        
        dim1, dim2 = theta_shapes[i]
        
        thetas.append(np.random.rand(dim1, dim2) * 2 * eps - eps)
        
    return thetas


def flatten_thetas(thetas):
    
    T = len(thetas)
       
    flat_thetas = np.array([])
    
    for i in range(T):
        flat_thetas = np.append(flat_thetas, thetas[i].flatten())
    
    return flat_thetas


def deflatten_thetas(flat_thetas, layers):
    
    theta_shapes = get_theta_shapes(layers)
    
    T = len(theta_shapes)
    
    thetas = [None] * T
    
    prev_end = 0
    
    for i in range(T):
        
        end = theta_shapes[i][0] * theta_shapes[i][1] + prev_end 
        
        thetas[i] = np.reshape(flat_thetas[prev_end:end], theta_shapes[i])
        
        prev_end = end
        
    return thetas


def forward_prop(X, thetas):

    T = len(thetas)
    
    layer_values = []
    
    Prev_layer = np.copy(X)
    
    for i in range(T):
        
        Prev_layer = np.insert(Prev_layer, 0, 1, axis=1)
        
        layer_values.append(Prev_layer)
    
        Current_layer = sigmoid(Prev_layer.dot(thetas[i].transpose()))
        
        Prev_layer = Current_layer
                
    layer_values.append(Prev_layer)
    
    return layer_values
      

def nn_cost(flat_thetas, layers, X, y, lambd):
    
    thetas = deflatten_thetas(flat_thetas, layers)
    
    T = len(thetas)
    
    m, n = X.shape
        
    H = forward_prop(X, thetas)[-1]
    
    J = (-1/m) * np.sum(y * np.log(H) + (1 - y) * np.log(1 - H))
    
    for i in range(T):
        J += lambd/(2*m) * np.sum(np.square(thetas[i][:, 1:]))       
        
    return J


def back_prop(flat_thetas, layers, X, y, lambd):
    
        thetas = deflatten_thetas(flat_thetas, layers)
        
        theta_shapes = get_theta_shapes(layers)
        
        m, n = X.shape
        
        L = len(layers)
        
        T = len(thetas)
        
        thetas_grad = [None] * T
        
        for i in range(T):
            thetas_grad[i] = np.zeros(theta_shapes[i])
    
        layer_values = forward_prop(X, thetas) 
        
        deltas = [None] * L
        
        deltas[L - 1] = layer_values[L - 1] - y
        
        for i in range(L - 2, 0, -1): 
            
            d_sigmoid_i = layer_values[i] * (1 - layer_values[i])
            
            deltas[i] = deltas[i + 1].dot(thetas[i]) * d_sigmoid_i
            
            deltas[i] = deltas[i][:, 1:]              
                
        for i in range(T):
            
            Theta_tmp = np.copy(thetas[i])
            
            Theta_tmp[:, 0] = 0
            
            thetas_grad[i] = 1/m * ((deltas[i + 1].transpose().dot(layer_values[i])) + lambd * Theta_tmp)
                  
        return flatten_thetas(thetas_grad)


def back_prop_check(flat_thetas, flat_grad, layers, eps, X, y, lambd):
    
    theta_shapes = get_theta_shapes(layers)
    
    sum_err = 0
    
    t = flat_grad.size
    
    for i in range(t):
        
        theta_pls = np.copy(flat_thetas)
        theta_min = np.copy(flat_thetas)
        
        theta_pls[i] += eps
        theta_min[i] -= eps
        
        approx_grad = 1/(2*eps) * (nn_cost(theta_pls, layers, X, y, lambd) - nn_cost(theta_min, layers, X, y, lambd))
        
        sum_err += abs(approx_grad - flat_grad[i])
                
    return sum_err/t


def gradient_descent(flat_init_thetas, X, y, layers, lambd, alpha, iterations, get_plt_data = False):
    
    thetas = flat_init_thetas

    m, n = X.shape
    
    if get_plt_data:
        iter_x = np.arange(iterations)
        iter_y = np.zeros(iterations)
    
    for i in range(iterations):
        grad = back_prop(thetas, layers, X, y, lambd)
                            
        thetas -= alpha*grad
        
        if get_plt_data:
            iter_y[i] = nn_cost(thetas, layers, X, y, lambd)
      
    if get_plt_data:
        return flatten(thetas, layers), iter_x, iter_y
    
    else:
        return thetas, nn_cost(thetas, layers, X, y, lambd)


def train(X, y, layers, lambd, method, alpha = None, iterations = None):
    
    flat_init_thetas = flatten_thetas(get_rand_thetas(layers))
    
    if method == 'GD':
        trained_thetas, cost = gradient_descent(flat_init_thetas, X, y, layers, lambd, alpha, iterations)

    else:
        op_result = minimize(fun = nn_cost, 
                             x0 = flat_init_thetas, 
                             args = (layers, X, y, lambd),
                             method = method,
                             jac = back_prop)
        
        trained_thetas = op_result['x']
        cost = op_result['fun']
    
    return deflatten_thetas(trained_thetas, layers), cost

    
def predict(X, thetas):
    
    predictions = forward_prop(X, thetas)[-1]

    predictions = np.argmax(predictions, axis=1)
    
    return predictions
                

def error(X, thetas, answers):
    
    predictions = predict(X, thetas)
    
    wrong_answers = predictions[predictions != answers].size
    
    all_answers = answers.size
    
    return wrong_answers / all_answers


def accuracy(X, thetas, answers):
    return 100*(1 - error(X, thetas, answers)) 


def training_analysis(method, atr, atr_range,
                      X_train, y_train, nn_y_train,
                      X_val, y_val, 
                      no_of_inputs = None,
                      no_of_classes = None,
                      default_layers = None, 
                      default_lambd = None, 
                      default_alpha = None, 
                      default_iterations = None,
                      print_progress = False):
    
    
    comp_time = []
    final_cost = [] 
    val_err = []
    train_err = []
    all_thetas = []

    for i in atr_range:
        
        time_start = time()
        
        if atr == 'examples':
            thetas, cost = train(X_train[:i, :], 
                                 nn_y_train[:i, :], 
                                 default_layers, 
                                 default_lambd, 
                                 method,
                                 default_alpha, 
                                 default_iterations)
   
        if atr[2:] == 'hidden_layers_sizes':          
            no_of_layers = int(atr[0])
            
            tested_layers = [i for n in range(no_of_layers)]
            tested_layers = [no_of_inputs] + tested_layers + [no_of_classes]

            thetas, cost = train(X_train, 
                                 nn_y_train, 
                                 tested_layers,
                                 default_lambd,
                                 method,
                                 default_alpha, 
                                 default_iterations)
       
        if atr == 'lambd':
            thetas, cost = train(X_train, 
                                 nn_y_train,
                                 default_layers,
                                 i, 
                                 method,
                                 default_alpha, 
                                 default_iterations) 
            
        if atr == 'alpha':
            thetas, cost = train(X_train, 
                                 nn_y_train,
                                 default_layers,
                                 default_lambd,
                                 method,
                                 i, 
                                 default_iterations)

        if atr == 'max_iterations':
            thetas, cost = train(X_train, 
                                 nn_y_train,
                                 default_layers,
                                 default_lambd,
                                 method,
                                 default_alpha, 
                                 i)

        time_end = time()
        time_elapsed = time_end - time_start

        
        tmp_X_train = X_train[:i, :] if atr == "examples" else X_train
        tmp_y_train = y_train[:i, :] if atr == "examples" else y_train
        tmp_X_val = X_train[i:i + int(3/7*i), :] if atr == "examples" else X_val
        tmp_y_val = y_train[i:i + int(3/7*i), :] if atr == "examples" else y_val
                       
        train_err_measured = error(tmp_X_train, thetas, tmp_y_train)
        val_err_measured = error(tmp_X_val, thetas, tmp_y_val)
        
        
        final_cost.append(cost)
        comp_time.append(time_elapsed)
        val_err.append(val_err_measured)
        train_err.append(train_err_measured)
        all_thetas.append(thetas)
        
        if print_progress:
            print "cost: {}, time: {}, val_err: {}".format(cost, time_elapsed, val_err_measured)
    
    return [('method', method),
            (atr,  atr_range), 
            ('computation_time[s]', comp_time), 
            ('final_cost', final_cost),  
            ('val_error', val_err),
            ('train_error', train_err)], all_thetas


def get_plots(plot_data_list, legend_ad = '', color = "blue"):
    
    all_legend = plot_data_list[0][1] + legend_ad    
    plots = []

    all_x_axis = plot_data_list[1][1]
    all_x_axis_label = plot_data_list[1][0]
    
    for plot_data in plot_data_list[2:-1]:
        
        plots.append(figure(x_axis_label = all_x_axis_label , 
                             y_axis_label = plot_data[0]))
        
        plots[-1].line(x = all_x_axis, 
                       y = plot_data[1],
                       legend = all_legend,
                       color = color)
        
    err_plot = figure(x_axis_label = all_x_axis_label,
                      y_axis_label = plot_data_list[-1][0])
    
    err_plot.line(x = all_x_axis,
                  y = plot_data_list[-2][1],
                  legend = plot_data_list[-2][0],
                  color = color)
    
    err_plot.line(x = all_x_axis,
                  y = plot_data_list[-1][1],
                  legend = plot_data_list[-1][0],
                  color = 'green')
    
    return plots, err_plot


def add_to_plots(plots, plot_data_list, legend_ad = '', color = "firebrick"):
    
    all_legend = plot_data_list[0][1] + legend_ad
    
    all_x_axis = plot_data_list[1][1]
    
    plt_iter = iter(plots)
    
    for plot_data in plot_data_list[2:-1]:

        next(plt_iter).line(x = all_x_axis, 
                       y = plot_data[1],
                       legend = all_legend,
                       color = color)
    

In [96]:
#raw_data must be in a form - atributes in columns 0:-1, classification in column -1
def extract_raw_data(raw_data):
    raw_data = np.copy(raw_data)
    
    all_examples = raw_data.shape[0]
    
    train_examples = int(0.6 * all_examples)
    val_examples = int(0.2 * all_examples)
    test_examples = all_examples - (train_examples + val_examples)
    
    X_train = raw_data[:train_examples, :-1]
    y_train = raw_data[:train_examples, -1]
    
    X_val = raw_data[train_examples: train_examples + val_examples , :-1]
    y_val = raw_data[train_examples: train_examples + val_examples , -1]
    
    X_test = raw_data[train_examples + val_examples: , :-1]
    y_test = raw_data[train_examples + val_examples: , -1]
    
    return X_train, y_train, X_val, y_val, X_test, y_test

In [97]:
# #Connect Four

# raw_data = np.genfromtxt(r"C:\Users\Dell\Documents\CS\Machine Learning\Datasets\connect-4.txt", delimiter=',', dtype=np.str)

# #b = 0, x = 1, o = 2, 
# #loss = 0, win = 1, draw =2
# raw_data[raw_data=='b'] = 0
# raw_data[raw_data=='x'] = 1
# raw_data[raw_data=='o'] = 2
# raw_data[raw_data=='loss'] = 0
# raw_data[raw_data=='win'] = 1
# raw_data[raw_data=='draw'] = 2

# raw_data = raw_data.astype(np.float32)

# K = 3 #number of classes

In [99]:
# #Breast cancer

# raw_data = np.genfromtxt("C:\\Users\\Dell\\Documents\\CS\\Machine Learning\\Datasets\\bsc.txt", delimiter=',')
# raw_data = np.nan_to_num(raw_data)
# raw_data = raw_data[:, 1:]
# raw_data[np.where(raw_data[:, -1] == 2), -1] = 0
# raw_data[np.where(raw_data[:, -1] == 4), -1] = 1

# K = 2

In [100]:
# #Abalone age

# raw_data = np.genfromtxt(r"C:\Users\Dell\Documents\CS\Machine Learning\Datasets\abalone_age.txt", delimiter=',', 
#                          usecols=(1, 2, 3, 4, 5, 6, 7, 8))
# raw_data2 = np.genfromtxt(r"C:\Users\Dell\Documents\CS\Machine Learning\Datasets\abalone_age.txt", delimiter=',', 
#                          usecols=(0), dtype=np.str)
# #I = 0, M = 1, F = 2
# raw_data2[raw_data2 == 'I'] = 0
# raw_data2[raw_data2 == 'M'] = 1
# raw_data2[raw_data2 == 'F'] = 2
# raw_data2 = raw_data2.astype(np.float64)
# raw_data = np.insert(raw_data, 0, raw_data2, axis=1)

# raw_data[:, -1] = raw_data[:, -1] - 1

# K = 29

In [101]:
# #Opt Digits
# raw_data = np.genfromtxt(r"C:\Users\Dell\Documents\CS\Machine Learning\Datasets\optdigits_tra.txt", delimiter=',')
# raw_test_data = np.genfromtxt(r"C:\Users\Dell\Documents\CS\Machine Learning\Datasets\optdigits_tes.txt", delimiter=',')
# np.random.shuffle(raw_data)
# np.random.shuffle(raw_test_data)
# K = 10

In [122]:
#Andrew's handwritten digit recognition

contents1 = loadmat(r"C:\Users\Dell\Documents\CS\Machine Learning\Andrew's Assignements\machine-learning-ex4\ex4\ex4data1.mat")
tmp_X = contents1['X']
tmp_y = contents1['y']
tmp_y[tmp_y == 10] = 0
tmp_y = tmp_y.flatten()

raw_data = np.insert(tmp_X, 400, tmp_y, axis = 1)

np.random.shuffle(raw_data)

K = 10

In [123]:
X_train, y_train, X_val, y_val, X_test, y_test = extract_raw_data(raw_data)
nn_y_train = get_nn_y(y_train, K)
m, n = X_train.shape

In [131]:
atr = 'examples' # examples, N_hiden_layers_sizes, lambd, alpha(only for GD), max_iterations(only for GD) 
atr_range = np.arange(100, 500)

plot_list1, all_thetas1 = training_analysis('TNC', atr, atr_range,
                                            X_train, y_train, nn_y_train,
                                            X_val, y_val, 
#                                             no_of_inputs = n,
#                                             no_of_classes = K,
                                            default_layers = [n, 5, K], 
                                            default_lambd = 1, 
#                                             default_alpha = 1/m, 
#                                             default_iterations = 10000,
                                            print_progress = True)

# plot_list2, all_thetas2 = training_analysis('TNC', test_atr, test_atr_range,
#                                              test_X, test_y, X, y, orig_y,
#                                              default_layers = [n, 10, K], 
#                                              #default_lambd = 3, 
#                                              print_progress = False)

1.9610724223643847 0.3760099411010742 0.367
1.9522726083610344 0.28729939460754395 0.385
1.9402788668676685 0.30782008171081543 0.317
1.942131202984712 0.49381375312805176 0.345
1.924150110213393 0.3404045104980469 0.376
1.902197221904951 0.355823278427124 0.359
1.9024164722148966 0.4392874240875244 0.326
1.9467670041075085 0.3358933925628662 0.377
1.933653600204094 0.45220255851745605 0.397
1.886043575971206 0.5403292179107666 0.346
1.8812925028919514 0.4441835880279541 0.338
1.8930043321023677 0.3328862190246582 0.336
1.9034817621093167 0.41811227798461914 0.396
1.8882790573015646 0.4612996578216553 0.388
1.8498510296161674 0.5314140319824219 0.368
1.9216553569945232 0.5093550682067871 0.374
1.8951065012385895 0.6091177463531494 0.349
1.8488822327514614 0.4692418575286865 0.34




1.93319968314655 0.3018820285797119 0.387
1.8561374189819926 0.385753870010376 0.387
1.8117170854597417 0.29789185523986816 0.344
1.8286311057309028 0.6068770885467529 0.326
1.8207977839404565 0.3755943775177002 0.338
1.8246951079341973 0.2978031635284424 0.321
1.8327811182989358 0.3714871406555176 0.346
1.7858612413344108 0.5133669376373291 0.317
1.8321695578833759 0.4967663288116455 0.337
1.8689980717327854 0.3940470218658447 0.347
1.804040579011661 0.44217777252197266 0.327
1.7892410597574273 0.566434383392334 0.313
1.772121505809082 0.3752462863922119 0.327
1.7925984002149435 0.7487030029296875 0.304
1.7618157503735434 0.6160831451416016 0.321
1.7359325977702484 0.6448535919189453 0.317
1.7818516229925825 0.3885340690612793 0.364
1.7804852155960134 0.5669920444488525 0.365
1.7550658501256495 0.5243120193481445 0.324
1.7346477011055126 0.5621654987335205 0.297
1.7717267217819015 0.52413010597229 0.354
1.7329103922308675 0.5989701747894287 0.315
1.750862277717626 0.5625782012939453 0

1.3932689489077243 1.3179712295532227 0.251
1.3676844630104252 1.50545072555542 0.222
1.3714508246593662 1.210235834121704 0.227
1.38646710623003 1.727217435836792 0.225
1.4324304481494985 1.266580581665039 0.22
1.3768412605983091 2.0645110607147217 0.205
1.3635705079391038 2.4396238327026367 0.233
1.372283204799735 0.9287309646606445 0.232
1.353333488252996 1.3852877616882324 0.216
1.357631847539658 1.5300681591033936 0.215
1.3726319636971376 1.121906042098999 0.196
1.3776506695521367 2.119368553161621 0.188
1.3433731562415985 2.7836339473724365 0.234
1.351385940867256 1.7116928100585938 0.191
1.3650243965947804 2.8643736839294434 0.206
1.3728926511634425 2.502997875213623 0.218
1.3745485538523239 2.218855142593384 0.215
1.3545780762201023 2.4380455017089844 0.241
1.33197584826801 2.2650630474090576 0.229
1.3704457970030113 1.9020638465881348 0.225
1.3854123368425295 4.955258369445801 0.254
1.3823198153875929 2.889181613922119 0.23
1.3757417802469607 3.593186855316162 0.204
1.32994771

1.2396519914170714 4.76277494430542 0.179
1.231758576074546 1.8555078506469727 0.176


In [132]:
plots, err_plot = get_plots(plot_list1, legend_ad = '')
# add_to_plots(plots, plot_list2, legend_ad = ' - hidden layer - 10 neurons')

In [133]:
show(row(plots))

In [134]:
show(err_plot)

In [58]:
# plots2, err_plot2 = get_plots(plot_list2, legend_ad = ' - hidden layer - 10 neurons')

In [59]:
# show(row(plots2))

In [60]:
# show(err_plot2)

In [71]:
a = '3'
int(a)

3