In [201]:
# Package imports
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model
from sklearn.model_selection import train_test_split

In [203]:
np.random.seed(0)
X, y = sklearn.datasets.make_moons(200, noise=0.20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.43, random_state=42)

In [144]:
input_layer=3000
output_layer=10
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality

#gradient decent
eta=0.01
reg_lambda=0.01

In [145]:
def initialize_weights(nn_input_dim, nn_hdim,nn_output_dim):
    # Initialize the parameters to random values. We need to learn these.
    np.random.seed(0)
    W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
    b1 = np.zeros((1, nn_hdim))
    W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
    b2 = np.zeros((1, nn_output_dim))
    return W1,b1,W2,b2

In [146]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

In [147]:
def softmax(z):
    exp_scores = np.exp(z)
    return (exp_scores / np.sum(exp_scores, axis=1, keepdims=True))

In [245]:
def feedforward(W1,b1,W2,b2,X):
    for i in X:
    # Forward propagation
        z1 = X.dot(W1) + b1
        #print(z1)
        a1 = np.tanh(z1)
        z2 = a1.dot(W2) + b2
        exp_scores = np.exp(z2)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    #print(probs)
    return z1,a1,z2,probs

In [264]:
def feedforward(W1,b1,W2,b2,X):
    # Forward propagation
    z1 = X.dot(W1) + b1
        #print(z1)
    a1 = np.tanh(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    #print(probs)
    return z1,a1,z2,probs

In [271]:
# This function learns parameters for the neural network and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations
def build_model(X,y,nn_hdim, num_passes, print_loss):
    # This is what we return at the end
    model = {}
    W1,b1,W2,b2=initialize_weights(nn_input_dim, nn_hdim,nn_output_dim)
    # Gradient descent. For each batch...
    for i in range(0,num_passes):
        #Forward propagation       
        z1,a1,z2,probs=feedforward(W1,b1,W2,b2,X)
        # Backpropagation
        delta3 = probs
        delta3[range(len(X)), y] -= 1
        dW2 = (a1.T).dot(delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
        dW1 = np.dot(X.T, delta2)
        db1 = np.sum(delta2, axis=0)

        # Add regularization terms (b1 and b2 don't have regularization terms)
        dW2 += reg_lambda * W2
        dW1 += reg_lambda * W1

        # Gradient descent parameter update
        W1 += -eta * dW1
        b1 += -eta * db1
        W2 += -eta * dW2
        b2 += -eta * db2
        
        # Assign new parameters to the model
        model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        
        # Optionally print the loss.
        # This is expensive because it uses the whole dataset, so we don't want to do it too often.
        if print_loss and i % 1000 == 0:
          print("Loss after iteration %i: %f" %(i, calculate_loss(model,X,y)))  
    return model


In [197]:
def calculate_loss(model,X,y):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    z1,a1,z2,probs=feedforward(W1,b1,W2,b2,X)
    # Calculating the loss
    corect_logprobs = -np.log(probs[range(num_examples), y])
    data_loss = np.sum(corect_logprobs)
    # Add regulatization term to loss (optional)
    data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
    return 1./num_examples * data_loss

In [267]:
# Helper function to predict an output (0 or 1)
def predict(model, x):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    # Forward propagation
    z1,a1,z2,probs=feedforward(W1,b1,W2,b2,x)
    return np.argmax(probs,axis=1)
    

In [272]:
# Build a model with a 3-dimensional hidden layer
num_examples = len(X_train)
model = build_model(X_train,y_train,4,2000, False)

In [266]:
delta3=predict(model,X_train)
delta3[range(num_examples), y_train] -= 1

IndexError: too many indices for array

In [251]:
delta3

array([[ 0.03173218, -0.03173218],
       [ 0.1496922 , -0.1496922 ],
       [-0.07882916,  0.07882916],
       [-0.08546984,  0.08546984],
       [-0.09370515,  0.09370515],
       [ 0.85938742, -0.85938742],
       [ 0.24848691, -0.24848691],
       [ 0.67472456, -0.67472456],
       [ 0.09119246, -0.09119246],
       [ 0.67981446, -0.67981446],
       [-0.19028324,  0.19028324],
       [-0.59661492,  0.59661492],
       [ 0.07652409, -0.07652409],
       [-0.20781237,  0.20781237],
       [ 0.05173825, -0.05173825],
       [ 0.04472622, -0.04472622],
       [-0.09026164,  0.09026164],
       [-0.79645322,  0.79645322],
       [-0.13341202,  0.13341202],
       [-0.04919831,  0.04919831],
       [-0.10599154,  0.10599154],
       [ 0.04227617, -0.04227617],
       [-0.03378793,  0.03378793],
       [ 0.55244027, -0.55244027],
       [-0.11341456,  0.11341456],
       [-0.43359977,  0.43359977],
       [-0.08041381,  0.08041381],
       [-0.12226897,  0.12226897],
       [ 0.03229087,

In [274]:
y_pred = predict(model,X_test[0])
y_test[0]

1

In [276]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test[0], y_pred[0])

TypeError: Singleton array 1 cannot be considered a valid collection.

In [277]:
# This function learns parameters for the neural network and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations
def build_model(X,y,nn_hdim, num_passes, print_loss):
    # This is what we return at the end
    model = {}
    W1,b1,W2,b2=initialize_weights(nn_input_dim, nn_hdim,nn_output_dim)
    # Gradient descent. For each batch...
    for i in range(0,num_passes):
        #Forward propagation       
        z1,a1,z2,probs=feedforward(W1,b1,W2,b2,X)
        # Backpropagation
        delta3 = probs
        delta3[range(len(X)), y] -= 1
        dW2 = (a1.T).dot(delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
        dW1 = np.dot(X.T, delta2)
        db1 = np.sum(delta2, axis=0)

        # Add regularization terms (b1 and b2 don't have regularization terms)
        dW2 += reg_lambda * W2
        dW1 += reg_lambda * W1

        # Gradient descent parameter update
        W1 += -eta * dW1
        b1 += -eta * db1
        W2 += -eta * dW2
        b2 += -eta * db2
        
        # Assign new parameters to the model
        model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        
        # Optionally print the loss.
        # This is expensive because it uses the whole dataset, so we don't want to do it too often.
        if print_loss and i % 1000 == 0:
          print("Loss after iteration %i: %f" %(i, calculate_loss(model,X,y)))  
    return model


In [None]:
num_examples = len(X_train[0])
for i in X_train:
    model = build_model(X_train,y_train,4,2000, False)