In [1]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np

# Plotting library
from matplotlib import pyplot

# Optimization module in scipy
from scipy import optimize

# will be used to load MATLAB mat datafile format
from scipy.io import loadmat

# library written for this exercise providing additional functions for assignment submission, and others
import utils



In [2]:
import pandas as pd

df = pd.read_csv('codon_usage.csv',low_memory=False)
df = df.drop(labels=[486,5063], axis=0)
df['UUC'] = pd.to_numeric(df['UUC'], downcast="float")
#y = df.loc[:,'Kingdom']
#X = df.iloc[:,6:69]

#data = pd.concat([X, y])
#print(pd.value_counts(y))
#data = data.groupby(y).mean().plot.barh(stacked=True,legend=False);
df = df[df["Kingdom"].str.contains("plm")==False]
#y = df.loc[:,'Kingdom']
#X = df.iloc[:,6:69]
#data = pd.concat([X, y])
#print(y.value_counts())
#print(y.count())
df['Kingdom'] = df["Kingdom"].map({"arc": 0, "bct":1, "phg":2, "pln":3, "inv":4, "vrt":5, 
                                        "mam":6, "rod":7, "pri":8, "vrl":9})
y = df.loc[:,'Kingdom'].to_numpy()
X = df.iloc[:,6:69].to_numpy()

In [3]:
from sklearn.model_selection import train_test_split

train_ratio = 0.6
test_ratio = 0.2
validation_ratio = 0.2

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio)

X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=validation_ratio/(train_ratio+test_ratio))

print(X_train.shape)
print(X_test.shape)
print(X_valid.shape)

(7804, 63)
(2602, 63)
(2602, 63)


In [4]:
from collections import Counter
from imblearn.combine import SMOTEENN 
print('Original dataset shape %s' % Counter(y_train))
sme = SMOTEENN(random_state=42)
X_res, y_res = sme.fit_resample(X_train, y_train)
print('Resampled dataset shape %s' % Counter(y_res))

Original dataset shape Counter({1: 1778, 9: 1696, 3: 1508, 5: 1216, 4: 801, 6: 363, 7: 140, 2: 122, 8: 102, 0: 78})
Resampled dataset shape Counter({0: 1778, 8: 1777, 2: 1776, 7: 1771, 6: 1743, 5: 1645, 4: 1613, 3: 1580, 1: 1528, 9: 1521})


In [5]:
X_train=X_res
y_train=y_res

## model

### build first layer

In [6]:
input_layer_size  = 63 
hidden_layer_size = 32
num_labels = 10 


In [7]:
def randInitializeWeights(L_in, L_out, epsilon_init=0.12):
    """
    Randomly initialize the weights of a layer in a neural network.
    
    Parameters
    ----------
    L_in : int
        Number of incomming connections.
    
    L_out : int
        Number of outgoing connections. 
    
    epsilon_init : float, optional
        Range of values which the weight can take from a uniform 
        distribution.
    
    Returns
    -------
    W : array_like
        The weight initialiatized to random values.  Note that W should
        be set to a matrix of size(L_out, 1 + L_in) as
        the first column of W handles the "bias" terms.
        
    Instructions
    ------------
    Initialize W randomly so that we break the symmetry while training
    the neural network. Note that the first column of W corresponds 
    to the parameters for the bias unit.
    """

    # You need to return the following variables correctly 
    W = np.zeros((L_out, 1 + L_in))

    # ====================== YOUR CODE HERE ======================

    W = np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init

    # ============================================================
    return W

In [8]:
print('Initializing Neural Network Parameters ...')

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)

# Unroll parameters
initial_nn_params = np.concatenate([initial_Theta1.ravel(), initial_Theta2.ravel()], axis=0)

Initializing Neural Network Parameters ...


In [9]:
def sigmoidGradient(z):
    """
    Computes the gradient of the sigmoid function evaluated at z. 
    This should work regardless if z is a matrix or a vector. 
    In particular, if z is a vector or matrix, you should return
    the gradient for each element.
    
    Parameters
    ----------
    z : array_like
        A vector or matrix as input to the sigmoid function. 
    
    Returns
    --------
    g : array_like
        Gradient of the sigmoid function. Has the same shape as z. 
    
    Instructions
    ------------
    Compute the gradient of the sigmoid function evaluated at
    each value of z (z can be a matrix, vector or scalar).
    
    Note
    ----
    We have provided an implementation of the sigmoid function 
    in `utils.py` file accompanying this assignment.
    """

    g = np.zeros(z.shape)

    # ====================== YOUR CODE HERE ======================

    g = utils.sigmoid(z) * (1 - utils.sigmoid(z))

    # =============================================================
    return g

In [10]:
def nnCostFunction(nn_params,
                   input_layer_size,
                   hidden_layer_size,
                   num_labels,
                   X, y, lambda_=0.0):
    """
    Implements the neural network cost function and gradient for a two layer neural 
    network which performs classification. 
    
    Parameters
    ----------
    nn_params : array_like
        The parameters for the neural network which are "unrolled" into 
        a vector. This needs to be converted back into the weight matrices Theta1
        and Theta2.
    
    input_layer_size : int
        Number of features for the input layer. 
    
    hidden_layer_size : int
        Number of hidden units in the second layer.
    
    num_labels : int
        Total number of labels, or equivalently number of units in output layer. 
    
    X : array_like
        Input dataset. A matrix of shape (m x input_layer_size).
    
    y : array_like
        Dataset labels. A vector of shape (m,).
    
    lambda_ : float, optional
        Regularization parameter.
 
    Returns
    -------
    J : float
        The computed value for the cost function at the current weight values.
    
    grad : array_like
        An "unrolled" vector of the partial derivatives of the concatenatation of
        neural network weights Theta1 and Theta2.
    
    Instructions
    ------------
    You should complete the code by working through the following parts.
    
    - Part 1: Feedforward the neural network and return the cost in the 
              variable J. After implementing Part 1, you can verify that your
              cost function computation is correct by verifying the cost
              computed in the following cell.
    
    - Part 2: Implement the backpropagation algorithm to compute the gradients
              Theta1_grad and Theta2_grad. You should return the partial derivatives of
              the cost function with respect to Theta1 and Theta2 in Theta1_grad and
              Theta2_grad, respectively. After implementing Part 2, you can check
              that your implementation is correct by running checkNNGradients provided
              in the utils.py module.
    
              Note: The vector y passed into the function is a vector of labels
                    containing values from 0..K-1. You need to map this vector into a 
                    binary vector of 1's and 0's to be used with the neural network
                    cost function.
     
              Hint: We recommend implementing backpropagation using a for-loop
                    over the training examples if you are implementing it for the 
                    first time.
    
    - Part 3: Implement regularization with the cost function and gradients.
    
              Hint: You can implement this around the code for
                    backpropagation. That is, you can compute the gradients for
                    the regularization separately and then add them to Theta1_grad
                    and Theta2_grad from Part 2.
    
    Note 
    ----
    We have provided an implementation for the sigmoid function in the file 
    `utils.py` accompanying this assignment.
    """
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, (input_layer_size + 1)))

    Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                        (num_labels, (hidden_layer_size + 1)))

    # Setup some useful variables
    m = y.size
         
    # You need to return the following variables correctly 
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    # ====================== YOUR CODE HERE ======================
    
    a1 = np.concatenate([np.ones((m, 1)), X], axis=1)
    
    a2 = utils.sigmoid(a1.dot(Theta1.T))
    a2 = np.concatenate([np.ones((a2.shape[0], 1)), a2], axis=1)
    
    a3 = utils.sigmoid(a2.dot(Theta2.T))
    
    y_matrix = y.reshape(-1)
    y_matrix = np.eye(num_labels)[y_matrix]
    
    temp1 = Theta1
    temp2 = Theta2
    
    # Add regularization term
    
    reg_term = (lambda_ / (2 * m)) * (np.sum(np.square(temp1[:, 1:])) + np.sum(np.square(temp2[:, 1:])))
    
    J = (-1 / m) * np.sum((np.log(a3) * y_matrix) + np.log(1 - a3) * (1 - y_matrix)) + reg_term
    
    # Backpropogation
    
    delta_3 = a3 - y_matrix
    delta_2 = delta_3.dot(Theta2)[:, 1:] * sigmoidGradient(a1.dot(Theta1.T))

    Delta1 = delta_2.T.dot(a1)
    Delta2 = delta_3.T.dot(a2)
    
    # Add regularization to gradient

    Theta1_grad = (1 / m) * Delta1
    Theta1_grad[:, 1:] = Theta1_grad[:, 1:] + (lambda_ / m) * Theta1[:, 1:]
    
    Theta2_grad = (1 / m) * Delta2
    Theta2_grad[:, 1:] = Theta2_grad[:, 1:] + (lambda_ / m) * Theta2[:, 1:]
    
   
    
    # ================================================================
    # Unroll gradients
    # grad = np.concatenate([Theta1_grad.ravel(order=order), Theta2_grad.ravel(order=order)])
    
    grad = np.concatenate([Theta1_grad.ravel(), Theta2_grad.ravel()])
    
    return J, grad

In [11]:
#  Check gradients by running checkNNGradients
lambda_ = 1
utils.checkNNGradients(nnCostFunction, lambda_)

# Also output the costFunction debugging values
debug_J, _  = nnCostFunction(initial_nn_params, input_layer_size,
                          hidden_layer_size, num_labels, X_train, y_train, lambda_)

print('\n\nCost at (fixed) debugging parameters (w/ lambda = %f): %f ' % (lambda_, debug_J))
print('(for lambda = 3, this value should be about 0.576051)')

[[-0.00927825 -0.00927825]
 [-0.00559136 -0.00559136]
 [-0.02017486 -0.02017486]
 [-0.00585433 -0.00585433]
 [ 0.00889912  0.00889912]
 [ 0.01315402  0.01315402]
 [-0.01049831 -0.01049831]
 [-0.01910997 -0.01910997]
 [-0.00836011 -0.00836011]
 [ 0.01976123  0.01976123]
 [ 0.00811587  0.00811587]
 [-0.01515689 -0.01515689]
 [ 0.00762814  0.00762814]
 [ 0.00827936  0.00827936]
 [ 0.02014747  0.02014747]
 [ 0.00315079  0.00315079]
 [-0.00674798 -0.00674798]
 [-0.0109273  -0.0109273 ]
 [ 0.01262954  0.01262954]
 [ 0.01809234  0.01809234]
 [ 0.31454497  0.31454497]
 [ 0.14895477  0.14895477]
 [ 0.17770766  0.17770766]
 [ 0.14745891  0.14745891]
 [ 0.15953087  0.15953087]
 [ 0.14381027  0.14381027]
 [ 0.11105659  0.11105659]
 [ 0.03839516  0.03839516]
 [ 0.0775739   0.0775739 ]
 [ 0.03592373  0.03592373]
 [ 0.07350885  0.07350885]
 [ 0.03392626  0.03392626]
 [ 0.0974007   0.0974007 ]
 [ 0.04486928  0.04486928]
 [ 0.05899539  0.05899539]
 [ 0.03843063  0.03843063]
 [ 0.06015138  0.06015138]
 

In [12]:
#  After you have completed the assignment, change the maxiter to a larger
#  value to see how more training helps.
options= {'maxiter': 500}

#  You should also try different values of lambda
lambda_ = 0.3

# Create "short hand" for the cost function to be minimized
costFunction = lambda p: nnCostFunction(p, input_layer_size,
                                        hidden_layer_size,
                                        num_labels, X_train, y_train, lambda_)

# Now, costFunction is a function that takes in only one argument
# (the neural network parameters)
res = optimize.minimize(costFunction,
                        initial_nn_params,
                        jac=True,
                        method='TNC',
                        options=options)

# get the solution of the optimization
nn_params = res.x
        
# Obtain Theta1 and Theta2 back from nn_params
Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, (input_layer_size + 1)))

Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                    (num_labels, (hidden_layer_size + 1)))

### build second layer

In [15]:
print(Theta1)
print(Theta1.shape)
print(X_train)
print(X_train.shape)
a1 = np.concatenate([np.ones((16732, 1)), X_train], axis=1)
print(a1)
print(a1.shape)
a2 = utils.sigmoid(a1.dot(Theta1.T))
print(a2)
print(a2.shape)

[[ 0.26058534 -3.44892582  1.79080436 ...  1.88274598  1.24365585
   4.38013257]
 [-0.27378612  3.37179038  1.70888211 ...  3.0388796   0.82195142
  -2.92381017]
 [ 0.09866643 -0.39202636  4.41828259 ... -1.61131017  1.02866712
   2.0232074 ]
 ...
 [ 0.12952906  1.40453885 -0.73447447 ...  1.37917151 -0.72570072
   0.02056101]
 [-0.43346546  2.79007308  0.04965792 ...  0.16040485  0.05782365
  -0.53121348]
 [-0.29399809 -3.77389085  0.8864798  ... -0.43325231  1.29615034
   5.59418873]]
(32, 64)
[[0.02256    0.0384     0.01715    ... 0.00186    0.0013     0.00261   ]
 [0.01262    0.03824    0.01109    ... 0.00191    0.         0.        ]
 [0.01797    0.04105    0.01594    ... 0.00176    0.00074    0.00104   ]
 ...
 [0.02101646 0.01418457 0.01711674 ... 0.00065799 0.00135597 0.000329  ]
 [0.0126689  0.03187748 0.02008067 ... 0.0003823  0.00090413 0.00036496]
 [0.02036344 0.01830769 0.0182807  ... 0.00252    0.00114361 0.00045901]]
(16732, 63)
[[1.00000000e+00 2.25600004e-02 3.84000000e

In [16]:
input_layer_size_2  = 32 
hidden_layer_size_2 = 32
num_labels_2 = 10 


In [17]:
print('Initializing Neural Network Parameters ...')

initial_Theta1_2 = randInitializeWeights(input_layer_size_2, hidden_layer_size_2)
initial_Theta2_2 = randInitializeWeights(hidden_layer_size_2, num_labels_2)

# Unroll parameters
initial_nn_params_2 = np.concatenate([initial_Theta1_2.ravel(), initial_Theta2_2.ravel()], axis=0)

Initializing Neural Network Parameters ...


In [90]:
#  After you have completed the assignment, change the maxiter to a larger
#  value to see how more training helps.
options= {'maxiter': 600}

#  You should also try different values of lambda
lambda_ = 0.3

# Create "short hand" for the cost function to be minimized
costFunction = lambda p: nnCostFunction(p, input_layer_size_2,
                                        hidden_layer_size_2,
                                        num_labels_2, a2, y_train, lambda_)

# Now, costFunction is a function that takes in only one argument
# (the neural network parameters)
res = optimize.minimize(costFunction,
                        initial_nn_params_2,
                        jac=True,
                        method='TNC',
                        options=options)

# get the solution of the optimization
nn_params_2 = res.x
        
# Obtain Theta1 and Theta2 back from nn_params
Theta1_2 = np.reshape(nn_params_2[:hidden_layer_size_2 * (input_layer_size_2 + 1)],
                    (hidden_layer_size_2, (input_layer_size_2 + 1)))

Theta2_2 = np.reshape(nn_params_2[(hidden_layer_size_2 * (input_layer_size_2 + 1)):],
                    (num_labels_2, (hidden_layer_size_2 + 1)))

### build third layer

In [94]:
print(a2.shape)
print(Theta1_2.shape)

(16732, 32)
(32, 33)


In [95]:
a1_3 = np.concatenate([np.ones((16732, 1)), a2], axis=1)
a2_3 = utils.sigmoid(a1_3.dot(Theta1_2.T))

In [96]:
input_layer_size_2  = 32 
hidden_layer_size_2 = 32
num_labels_2 = 10 


In [97]:
print('Initializing Neural Network Parameters ...')

initial_Theta1_2 = randInitializeWeights(input_layer_size_2, hidden_layer_size_2)
initial_Theta2_2 = randInitializeWeights(hidden_layer_size_2, num_labels_2)

# Unroll parameters
initial_nn_params_2 = np.concatenate([initial_Theta1_2.ravel(), initial_Theta2_2.ravel()], axis=0)

Initializing Neural Network Parameters ...


In [143]:
#  After you have completed the assignment, change the maxiter to a larger
#  value to see how more training helps.
options= {'maxiter': 500}

#  You should also try different values of lambda
lambda_ = 0.8

# Create "short hand" for the cost function to be minimized
costFunction = lambda p: nnCostFunction(p, input_layer_size_2,
                                        hidden_layer_size_2,
                                        num_labels_2, a2_3, y_train, lambda_)

# Now, costFunction is a function that takes in only one argument
# (the neural network parameters)
res = optimize.minimize(costFunction,
                        initial_nn_params_2,
                        jac=True,
                        method='TNC',
                        options=options)

# get the solution of the optimization
nn_params_3 = res.x
        
# Obtain Theta1 and Theta2 back from nn_params
Theta1_3 = np.reshape(nn_params_3[:hidden_layer_size_2 * (input_layer_size_2 + 1)],
                    (hidden_layer_size_2, (input_layer_size_2 + 1)))

Theta2_3 = np.reshape(nn_params_3[(hidden_layer_size_2 * (input_layer_size_2 + 1)):],
                    (num_labels_2, (hidden_layer_size_2 + 1)))

## evaluation 

### evaluate the first layer

In [13]:
pred_train = utils.predict(Theta1, Theta2, X_train)
print('Training Set Accuracy: %f' % (np.mean(pred_train == y_train) * 100))
pred_valid = utils.predict(Theta1, Theta2, X_valid)
print('Validation Set Accuracy: %f' % (np.mean(pred_valid == y_valid) * 100))
from sklearn.metrics import classification_report
print(classification_report(y_train, pred_train, target_names=['class 0', 'class 1', 'class 2','class 3','class 4','class 5','class 6','class 7','class 8','class 9']))
print(classification_report(y_valid, pred_valid, target_names=['class 0', 'class 1', 'class 2','class 3','class 4','class 5','class 6','class 7','class 8','class 9']))


Training Set Accuracy: 88.775998
Validation Set Accuracy: 81.322060
              precision    recall  f1-score   support

     class 0       0.96      1.00      0.98      1778
     class 1       0.92      0.89      0.90      1528
     class 2       0.94      0.97      0.95      1776
     class 3       0.89      0.87      0.88      1580
     class 4       0.84      0.81      0.83      1613
     class 5       0.96      0.90      0.93      1645
     class 6       0.87      0.79      0.83      1743
     class 7       0.79      0.88      0.83      1771
     class 8       0.87      0.87      0.87      1777
     class 9       0.85      0.89      0.87      1521

    accuracy                           0.89     16732
   macro avg       0.89      0.89      0.89     16732
weighted avg       0.89      0.89      0.89     16732

              precision    recall  f1-score   support

     class 0       0.45      0.96      0.61        26
     class 1       0.95      0.82      0.88       552
     class

### evaluate the second layer

In [91]:
pred_train = utils.predict(Theta1_2, Theta2_2, a2)
print('Training Set Accuracy: %f' % (np.mean(pred_train == y_train) * 100))

a1_v = np.concatenate([np.ones((2602, 1)), X_valid], axis=1)
a2_v = utils.sigmoid(a1_v.dot(Theta1.T))
pred_valid = utils.predict(Theta1_2, Theta2_2, a2_v)
print('Validation Set Accuracy: %f' % (np.mean(pred_valid == y_valid) * 100))
from sklearn.metrics import classification_report
print(classification_report(y_train, pred_train, target_names=['class 0', 'class 1', 'class 2','class 3','class 4','class 5','class 6','class 7','class 8','class 9']))
print(classification_report(y_valid, pred_valid, target_names=['class 0', 'class 1', 'class 2','class 3','class 4','class 5','class 6','class 7','class 8','class 9']))


Training Set Accuracy: 90.718384
Validation Set Accuracy: 83.243659
              precision    recall  f1-score   support

     class 0       0.97      1.00      0.98      1778
     class 1       0.93      0.91      0.92      1528
     class 2       0.94      0.97      0.96      1776
     class 3       0.90      0.89      0.89      1580
     class 4       0.88      0.86      0.87      1613
     class 5       0.97      0.94      0.95      1645
     class 6       0.87      0.82      0.84      1743
     class 7       0.82      0.88      0.85      1771
     class 8       0.90      0.89      0.90      1777
     class 9       0.90      0.90      0.90      1521

    accuracy                           0.91     16732
   macro avg       0.91      0.91      0.91     16732
weighted avg       0.91      0.91      0.91     16732

              precision    recall  f1-score   support

     class 0       0.50      0.96      0.66        26
     class 1       0.95      0.83      0.88       552
     class

### evaluate the third layer

In [144]:
pred_train = utils.predict(Theta1_3, Theta2_3, a2_3)
print('Training Set Accuracy: %f' % (np.mean(pred_train == y_train) * 100))

a1_3v = np.concatenate([np.ones((2602, 1)), a2_v], axis=1)
a2_3v = utils.sigmoid(a1_3v.dot(Theta1_2.T))
pred_valid = utils.predict(Theta1_3, Theta2_3, a2_3v)
print('Validation Set Accuracy: %f' % (np.mean(pred_valid == y_valid) * 100))
from sklearn.metrics import classification_report
print(classification_report(y_train, pred_train, target_names=['class 0', 'class 1', 'class 2','class 3','class 4','class 5','class 6','class 7','class 8','class 9']))
print(classification_report(y_valid, pred_valid, target_names=['class 0', 'class 1', 'class 2','class 3','class 4','class 5','class 6','class 7','class 8','class 9']))


Training Set Accuracy: 92.523309
Validation Set Accuracy: 84.358186
              precision    recall  f1-score   support

     class 0       0.98      1.00      0.99      1778
     class 1       0.94      0.93      0.93      1528
     class 2       0.95      0.98      0.96      1776
     class 3       0.92      0.90      0.91      1580
     class 4       0.89      0.89      0.89      1613
     class 5       0.96      0.95      0.96      1645
     class 6       0.90      0.86      0.88      1743
     class 7       0.86      0.91      0.89      1771
     class 8       0.94      0.92      0.93      1777
     class 9       0.92      0.92      0.92      1521

    accuracy                           0.93     16732
   macro avg       0.93      0.92      0.92     16732
weighted avg       0.93      0.93      0.93     16732

              precision    recall  f1-score   support

     class 0       0.56      0.92      0.70        26
     class 1       0.95      0.85      0.89       552
     class

## Result

In [146]:
a1_t = np.concatenate([np.ones((2602, 1)), X_test], axis=1)
a2_t = utils.sigmoid(a1_t.dot(Theta1.T))
a1_3t = np.concatenate([np.ones((2602, 1)), a2_t], axis=1)
a2_3t = utils.sigmoid(a1_3t.dot(Theta1_2.T))
pred_test = utils.predict(Theta1_3, Theta2_3, a2_3t)
print('Test Set Accuracy: %f' % (np.mean(pred_test == y_test) * 100))
from sklearn.metrics import classification_report
print(classification_report(y_test, pred_test, target_names=['class 0', 'class 1', 'class 2','class 3','class 4','class 5','class 6','class 7','class 8','class 9'],output_dict=True))


Test Set Accuracy: 84.396618
              precision    recall  f1-score   support

     class 0       0.38      0.73      0.50        22
     class 1       0.94      0.86      0.90       589
     class 2       0.49      0.81      0.61        52
     class 3       0.87      0.83      0.85       524
     class 4       0.67      0.75      0.71       259
     class 5       0.96      0.90      0.93       433
     class 6       0.79      0.70      0.74        94
     class 7       0.48      0.65      0.55        40
     class 8       0.58      0.80      0.67        41
     class 9       0.90      0.89      0.90       548

    accuracy                           0.84      2602
   macro avg       0.71      0.79      0.74      2602
weighted avg       0.86      0.84      0.85      2602



In [155]:

report=classification_report(y_test, pred_test, target_names=['class 0', 'class 1', 'class 2','class 3','class 4','class 5','class 6','class 7','class 8','class 9'],output_dict=True)
df = pd.DataFrame(report).transpose()


In [157]:
df.to_excel('data.xlsx', index = False)


In [147]:
def trainNN(nnCostFunction, X, y, lambda_=0, maxiter=100):
    
    # Create "short hand" for the cost function to be minimized
    costFunction = lambda p: nnCostFunction(p, input_layer_size,
                                        hidden_layer_size,
                                        num_labels, X, y, lambda_)
    # Now, costFunction is a function that takes in only one argument
    options = {'maxiter': maxiter}

    # Minimize using scipy
    res = optimize.minimize(costFunction,
                        initial_nn_params,
                        jac=True,
                        method='TNC',
                        options=options)

    return res.x


In [148]:
def learningCurve(X, y, Xval, yval, lambda_=0):
    
    # Number of training examples
    m = y.size
    
    # You need to return these values correctly
    error_train = np.zeros(m)
    error_val   = np.zeros(m)

    
    for i in range(1, m + 1):
        
        nn_params = trainNN(nnCostFunction, X[:i], y[:i], lambda_=lambda_)
        
        error_train[i - 1], _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X[:i], y[:i], lambda_=0)
        
        error_val[i - 1], _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, Xval, yval, lambda_=0)
        
    return error_train, error_val

In [None]:

error_train, error_val = learningCurve(X_train, y_train, X_valid, y_valid, lambda_=0.3)

pyplot.plot(np.arange(1, y.size+1), error_train, np.arange(1, y.size+1), error_val, lw=2)
pyplot.title('Learning curve for neural network')
pyplot.legend(['Train', 'Cross Validation'])
pyplot.xlabel('Number of training examples')
pyplot.ylabel('Error')
pyplot.axis([0, y.size, 0, 10])
