In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")

%matplotlib inline

In [4]:
df = pd.read_csv(r'.\train_data.csv')

In [5]:
y = pd.read_csv(r'.\train_labels.csv')
x = df

# Split the data in train_validate_test: 80:20 Train:Test
X_train_val, X_test, Y_train_val, Y_test = train_test_split(x, y, test_size=0.2, random_state=50)
# Split the data in train_validate_test: 90:10 Train:Validate
x_train, x_test, y_train, y_test = train_test_split(X_train_val, Y_train_val, test_size=0.1, random_state=50)

N = y_train.size

In [6]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(predictions, labels):
    N = labels.size
    mse = ((predictions - labels)**2).sum() / (2*N)
    
    return mse

def accuracy(predictions, labels):
    predicions_correct = predictions.max(axis=1) == labels.max(axis=1)
    accuracy = predicions_correct.mean()
    
    return accuracy

In [13]:
# hyperparameters
learning_rate = 0.5
epochs = 100

n_input = 784
n_hidden = 10
n_output = 4
bias = [np.zeros(n_hidden), np.zeros(n_output)]
np.random.seed(10)
weights_1 = np.random.normal(scale=0.5, size=(n_input, n_hidden))   # (4, 2)
weights_2 = np.random.normal(scale=0.5, size=(n_hidden, n_output))  # (2, 3)
print("OLD weights are:")
print("Weights from input to hidden layer:", weights_1)
print("Weights from hidden to outpu layer:", weights_2)
# training the neural net
monitoring = {"mean_squared_error": [], "accuracy": []}
for epoch in range(epochs):    
    
    # feedforward
    hidden_layer_inputs = np.dot(x_train, weights_1) + bias[-2]
    hidden_layer_outputs = sigmoid(hidden_layer_inputs)

    output_layer_inputs = np.dot(hidden_layer_outputs, weights_2) + bias[-1]
    output_layer_outputs = sigmoid(output_layer_inputs)
    
    
    # monitor training process
    mse = mean_squared_error(output_layer_outputs, y_train)
    acc = accuracy(output_layer_outputs, y_train)
    
    monitoring["mean_squared_error"].append(mse)
    monitoring["accuracy"].append(acc)
    
    
    # backpropagation
    output_layer_error = output_layer_outputs - y_train
    output_layer_delta = output_layer_error * output_layer_outputs * (1 - output_layer_outputs)

    hidden_layer_error = np.dot(output_layer_delta, weights_2.T)
    hidden_layer_delta = hidden_layer_error * hidden_layer_outputs * (1 - hidden_layer_outputs)

    
    # weight updates
    weights_2_update = np.dot(hidden_layer_outputs.T, output_layer_delta) / N
    weights_1_update = np.dot(x_train.T, hidden_layer_delta) / N

    weights_2 = weights_2 - learning_rate * weights_2_update
    weights_1 = weights_1 - learning_rate * weights_1_update
    #bias updates
    bias[-1] = bias[-1] - learning_rate*output_layer_delta/N
    bias[-2] = bias[-2] - learning_rate*hidden_layer_delta/N
print("Updated weights are:")
print("Weights from input to hidden layer:", weights_1)
print("Weights from hidden to outpu layer:", weights_2)
print("bias of output layer:",bias[-1])
monitoring_df = pd.DataFrame(monitoring)
monitoring_df

OLD weights are:
Weights from input to hidden layer: [[ 0.66579325  0.35763949 -0.77270015 ...  0.05427426  0.00214572
  -0.08730011]
 [ 0.21651309  0.60151869 -0.48253284 ...  0.06756844  0.7422685
  -0.53990244]
 [-0.98886414 -0.87168615  0.13303508 ...  0.69899819 -0.13562399
   0.30660209]
 ...
 [-0.65525474 -0.2581449   0.39199269 ... -0.1499362   0.2711711
  -0.26161749]
 [-0.23948924 -0.24653798  0.21128405 ... -0.887403    0.32889022
  -0.29837126]
 [ 0.38106452  0.29109196 -0.25335657 ...  0.09785153 -0.0341746
   0.05756914]]
Weights from hidden to outpu layer: [[ 0.49253976  0.16480257 -0.05729035 -0.2316216 ]
 [ 0.86029544  0.47654979  0.13243439 -0.32192288]
 [ 0.67355332 -0.39951134  1.12836133  0.36388273]
 [ 0.14082781  0.47503754 -0.40307529 -0.94068578]
 [-0.85054635  0.2064683  -0.83586409  0.16165919]
 [-0.07703012  0.4715005   0.20470836 -0.11303358]
 [-0.05926661  0.15629633 -1.14947972  0.09500192]
 [-0.10018552  0.11537668  0.5364384   0.06483497]
 [-0.7788787  

Unnamed: 0,mean_squared_error,accuracy
0,1.000000000000000000e+00 0.029909 0.00000...,0.0
1,1.000000000000000000e+00 0.029562 0.00000...,0.0
2,1.000000000000000000e+00 0.029227 0.00000...,0.0
3,1.000000000000000000e+00 0.028903 0.00000...,0.0
4,1.000000000000000000e+00 0.028589 0.00000...,0.0
...,...,...
95,1.000000000000000000e+00 0.019753 0.00000...,0.0
96,1.000000000000000000e+00 0.019731 0.00000...,0.0
97,1.000000000000000000e+00 0.019710 0.00000...,0.0
98,1.000000000000000000e+00 0.019689 0.00000...,0.0


In [10]:
# feedforward
hidden_layer_inputs = np.dot(x_test, weights_1)
hidden_layer_outputs = sigmoid(hidden_layer_inputs)

output_layer_inputs = np.dot(hidden_layer_outputs, weights_2)
output_layer_outputs = sigmoid(output_layer_inputs)
outs = output_layer_outputs
outs


array([[0.09745036, 0.39505523, 0.12022221, 0.21938366],
       [0.30844341, 0.26194403, 0.29105294, 0.39302231],
       [0.42827952, 0.57010408, 0.37181087, 0.25429089],
       ...,
       [0.19083613, 0.20461829, 0.42748631, 0.36911882],
       [0.15233156, 0.41596639, 0.29308195, 0.22814185],
       [0.27188167, 0.45277893, 0.38592456, 0.26675199]])

In [11]:
# encode the data
for i in range(len(outs)):
    max_Val = np.max(outs[i])
    
    for j in range(len(outs[i])):
        if outs[i][j] == max_Val:
            outs[i][j] = 1
        else:
            outs[i][j] = 0
        
outs

array([[0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [0., 1., 0., 0.],
       ...,
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.]])

In [12]:
print("Actual output{} predicted out {}".format(y_test, outs))
acc = accuracy(outs, y_test)
print("Accuracy: {}".format(acc))

Actual output       1.000000000000000000e+00  0.000000000000000000e+00  \
13904                       0.0                       0.0   
24324                       0.0                       0.0   
12055                       0.0                       0.0   
3849                        0.0                       1.0   
5327                        0.0                       0.0   
...                         ...                       ...   
11369                       0.0                       0.0   
22094                       0.0                       1.0   
8875                        0.0                       0.0   
2751                        0.0                       0.0   
16858                       0.0                       1.0   

       0.000000000000000000e+00.1  0.000000000000000000e+00.2  
13904                         1.0                         0.0  
24324                         0.0                         1.0  
12055                         0.0                         1.0 