### 1. Introduction to Neural Predictions

In [1]:
# Please make sure you have these libraries installed
# !pip install --no-index numpy
# !pip install --no-index torch
# !pip install --no-index matplotlib
# !pip install --no-index sklearn
# !pip install --no-index pandas

In [2]:
# Let's make a prediction

weight = 0.1
def neural_network(input, weight):
    prediction = input*weight
    return prediction

# This is our dataset.  For each time the team won, we averaged the number of toes (some players had one or more broken toes)
number_of_toes = [8.5, 9.5, 10, 9]
input = number_of_toes[0] # 8.5, will this team win?

pred = neural_network(input, weight)

print(round(pred, 3))

0.85


In [3]:
# Given the average number of toes of 8.5, there is 85% chance the team will win

![](pynb_pics/single_in.jpg)

In [4]:
# Multiple inputs

weights = [0.1, 0.2, 0]
def neural_network(input, weight):
    prediction = w_sum(input, weights)
    return prediction

def w_sum(a,b):
    assert(len(a) == len(b))
    output = 0
    for i in range(len(a)):
        output += (a[i] * b[i])
    return output

toes = [8.5, 9.5, 10, 9] # Set 1 number of average toes for the team
wlrec = [0.65, 0.8, 0.8, 0.9] # Set 2 - win / loss ratio
nfans = [1200, 1300, 500, 1000] # Set 3 - number of fans

input = [toes[0],wlrec[0],nfans[0]]
pred = neural_network(input, weights)

print(round(pred, 3))

0.98


In [5]:
# Given the 8.5 toes, 0.65 Win/Loss ratio and 1200 fans, there's a 98% of winning the match

![](pynb_pics/mult_inp.jpg)

In [6]:
# Multiple outputs: hurt, win, sad predictions

weights = [0.3, 0.2, 0.9]
def neural_network(input, weights):
    pred = ele_mul(input, weights)
    return pred

def ele_mul(number, vector):
    output = [0, 0, 0]
    assert(len(output) == len(vector))
    for i in range(len(vector)):
        output[i] = number * vector[i]
    return output

wlrec = [0.65, 0.8, 0.8, 0.9]
input = wlrec[0]
pred = neural_network(input, weights)

print(pred)

[0.195, 0.13, 0.5850000000000001]


![](pynb_pics/mult_out.jpg)

In [7]:
# Combine multiple inputs and multiple outputs: hurt, win, sad predictions

weights = [ [0.1, 0.1, -0.3],
            [0.1, 0.2, 0.0],
            [0.0, 1.3, 0.1] ]

def neural_network(input, weights):
    pred = vect_mat_mul(input,weights)
    return pred

toes = [8.5, 9.5, 9.9, 9.0]
wlrec = [0.65, 0.8, 0.8, 0.9]
nfans = [1200, 1300, 500, 1000]

input = [toes[0],wlrec[0],nfans[0]]

def vect_mat_mul(vect, matrix):
    output = [0] * len(vect)
    for i in range(len(vect)):
        output[i] = w_sum(vect, matrix[i])
    return output # Predictions

def w_sum(a,b):
    assert(len(a) == len(b))
    output = 0
    for i in range(len(a)):
        output += (a[i] * b[i])
    return output

pred = neural_network(input, weights)
print(pred)

[-359.085, 0.9800000000000001, 120.845]


In [8]:
# Given team 8.5 toes, 0.65 win/loss ratio, 1200 fan, there are no hurt, 98% chance of win and 120 sad

![](pynb_pics/mult_inout.jpg)

### Predicting on predictions

In [9]:
# NN can be stacked. A network can take output of a network and feed it as input to another network.
# This results in 2 consecutive vector matrix multiplications - In image classification (later)

           #toes %win #fans
ih_wgt = [ [0.1, 0.2, -0.1],#hid[0]
           [-0.1,0.1, 0.9], #hid[1]
           [0.1, 0.4, 0.1] ]#hid[2]

        # hid[0] hid[1] hid[2]
hp_wgt = [ [0.3, 1.1, -0.3],#hurt?
           [0.1, 0.2, 0.0], #win?
           [0.0, 1.3, 0.1] ]#sad?
weights = [ih_wgt, hp_wgt]

def neural_network(input, weights):
    hid = vect_mat_mul(input, weights[0]) # Call prediction with hidden weights
    pred = vect_mat_mul(hid, weights[1]) # hid is input for final prediction
    return pred

def vect_mat_mul(vect, matrix):
    output = [0, 0, 0]
    for i in range(len(vect)):
        output[i] = w_sum(vect, matrix[i])
    return output

def w_sum(a,b):
    assert(len(a) == len(b))
    output = 0
    for i in range(len(a)):
        output += (a[i] * b[i])
    return output

toes = [8.5, 9.5, 9.9, 9.0]
wlrec = [0.65,0.8, 0.8, 0.9]
nfans = [1200, 1300, 500, 1000]

input = [toes[0], wlrec[0], nfans[0]]
pred = neural_network(input, weights)

print(pred)

[1115.0975, 203.94099999999997, 1415.0905]


![](pynb_pics/mult_inout_stack.jpg)

In [10]:
# Using NumPy functions

import numpy as np
            #toes %win #fans
ih_wgt = np.array(
         [ [0.1, 0.2, -0.1],#hid[0]
           [-0.1,0.1, 0.9], #hid[1]
           [0.1, 0.4, 0.1] ]).T#hid[2]

        # hid[0] hid[1] hid[2]
hp_wgt = np.array(
         [ [0.3, 1.1, -0.3],#hurt?
           [0.1, 0.2, 0.0], #win?
           [0.0, 1.3, 0.1] ]).T  #sad?

weights = [ih_wgt, hp_wgt]

def neural_network(input, weights):
    hid = input.dot(weights[0])
    pred = hid.dot(weights[1])
    return pred

toes = np.array([8.5, 9.5, 9.9, 9.0])
wlrec = np.array([0.65,0.8, 0.8, 0.9])
nfans = np.array([1200, 1300, 500, 1000])

input = np.array([toes[0], wlrec[0], nfans[0]])
pred = neural_network(input, weights)

print(pred)

[1115.0975  203.941  1415.0905]


### 2. Introduction to neural learning

#### 2.1 Gradient descent

In [11]:
# Brief explanation of gradient descent
weight = 0.5
input = 0.5
goal_prediction = 0.8
step_amount = 10 # alpha - How much to move the weights in each iteration

def neural_network(input, weight):
  prediction = input * weight
  return prediction

for iteration in range(20):
  prediction = neural_network(input, weight)
  error = (prediction - goal_prediction) ** 2
  direction_and_amount = (prediction - goal_prediction) * input
  weight = weight - direction_and_amount

  print("Error:" + str(error) + " Prediction:" + str(prediction) )

Error:0.30250000000000005 Prediction:0.25
Error:0.17015625000000004 Prediction:0.3875
Error:0.095712890625 Prediction:0.49062500000000003
Error:0.05383850097656251 Prediction:0.56796875
Error:0.03028415679931642 Prediction:0.6259765625
Error:0.0170348381996155 Prediction:0.669482421875
Error:0.00958209648728372 Prediction:0.70211181640625
Error:0.005389929274097089 Prediction:0.7265838623046875
Error:0.0030318352166796153 Prediction:0.7449378967285156
Error:0.0017054073093822882 Prediction:0.7587034225463867
Error:0.0009592916115275371 Prediction:0.76902756690979
Error:0.0005396015314842384 Prediction:0.7767706751823426
Error:0.000303525861459885 Prediction:0.7825780063867569
Error:0.00017073329707118678 Prediction:0.7869335047900676
Error:9.603747960254256e-05 Prediction:0.7902001285925507
Error:5.402108227642978e-05 Prediction:0.7926500964444131
Error:3.038685878049206e-05 Prediction:0.7944875723333098
Error:1.7092608064027242e-05 Prediction:0.7958656792499823
Error:9.614592036015323

In [12]:
# Last step correctly approaches 0.8
# A single line of code to calculate both the direction and the amount that we should change our weight to reduce error
# Provides a superior form of learning that increments/decrements weight by a small amount to get as close to zero error as possible.

#### 2.2 Direction and amount - Explanation

In [13]:
# on the line direction_and_amount = (prediction - goal_prediction) * input
# the term: prediction - goal_prediction , represent the pure error
# Represents raw direction and amount that we missed - Let's call it Offset
# Positive - Predicted too high
# Negative - Predicted too low
# Weight ajustment by subtracting the offset

#### 2.3 Iterations

In [14]:
def neural_network(input, weights):
  out = 0
  for i in range(len(input)):
    out += (input[i] * weights[i])
  return out
def ele_mul(scalar, vector):
  out = [0,0,0]
  for i in range(len(out)):
    out[i] = vector[i] * scalar
  return out
toes = [8.5, 9.5, 9.9, 9.0]
wlrec = [0.65, 0.8, 0.8, 0.9]
nfans = [1.2, 1.3, 0.5, 1.0]
win_or_lose_binary = [1, 1, 0, 1]
true = win_or_lose_binary[0]
alpha = 0.01
weights = [0.1, 0.2, -.1]
input = [toes[0],wlrec[0],nfans[0]]

for iter in range(3):
  pred = neural_network(input,weights)
  error = (pred - true) ** 2
  delta = pred - true
  weight_deltas=ele_mul(delta,input)
#   weight_deltas[0] = 0
  print("Iteration:" + str(iter+1))
  print("Pred:" + str(pred))
  print("Error:" + str(error))
  print("Delta:" + str(delta))
  print("Weights:" + str(weights))
  print("Weight_Deltas:")
  print(str(weight_deltas))
  print()
  for i in range(len(weights)):
    weights[i]-=alpha*weight_deltas[i]

Iteration:1
Pred:0.8600000000000001
Error:0.01959999999999997
Delta:-0.1399999999999999
Weights:[0.1, 0.2, -0.1]
Weight_Deltas:
[-1.189999999999999, -0.09099999999999994, -0.16799999999999987]

Iteration:2
Pred:0.9637574999999999
Error:0.0013135188062500048
Delta:-0.036242500000000066
Weights:[0.1119, 0.20091, -0.09832]
Weight_Deltas:
[-0.30806125000000056, -0.023557625000000044, -0.04349100000000008]

Iteration:3
Pred:0.9906177228125002
Error:8.802712522307997e-05
Delta:-0.009382277187499843
Weights:[0.11498061250000001, 0.20114557625, -0.09788509000000001]
Weight_Deltas:
[-0.07974935609374867, -0.006098480171874899, -0.011258732624999811]



In [15]:
# What happens for several iterations ?
# More iterations will allow our network to learn
# Slopes are reflected by the weight_delta values
# Over the iterations, the slope is decreasing as we approach the bottom of the parabole

![](pynb_pics/grad_desc.jpg)

### 3. Building a DNN

In [16]:
# streetlights is the dataset of the observations made
# walk_vs_stop is what results observed.  It's what we want to know.
# Here we are training the dataset on the outputs and the data patterns observed.

In [17]:
import numpy as np
weights = np.array([0.5,0.48,-0.7])
alpha = 0.1 # Learning rate weight = weight = alpha*derivative
streetlights = np.array( [[ 1, 0, 1 ],
                          [ 0, 1, 1 ],
                          [ 0, 0, 1 ],
                          [ 1, 1, 1 ],
                          [ 0, 1, 1 ],
                          [ 1, 0, 1 ] ] )
walk_vs_stop = np.array( [[ 0 ],
                          [ 1 ],
                          [ 0 ],
                          [ 1 ],
                          [ 1 ],
                          [ 0 ] ] )

for iteration in range(40):
  error_for_all_lights = 0
  for row_index in range(len(walk_vs_stop)):
    input = streetlights[row_index]
    goal_prediction = walk_vs_stop[row_index]
    prediction = input.dot(weights)
    error = (goal_prediction - prediction) ** 2
    error_for_all_lights += error
    delta = prediction - goal_prediction
    weights = weights - (alpha * (input * delta)) # Weight updated.  Shared error measure and multiplying by each respective input
    print( "Weights:" + str(weights))
    print( "Prediction:" + str(prediction))
  print( "Error:" + str(error_for_all_lights) + "\n")

Weights:[ 0.52  0.48 -0.68]
Prediction:-0.19999999999999996
Weights:[ 0.52  0.6  -0.56]
Prediction:-0.19999999999999996
Weights:[ 0.52   0.6   -0.504]
Prediction:-0.5599999999999999
Weights:[ 0.5584  0.6384 -0.4656]
Prediction:0.6160000000000001
Weights:[ 0.5584   0.72112 -0.38288]
Prediction:0.17279999999999995
Weights:[ 0.540848  0.72112  -0.400432]
Prediction:0.17552
Error:[2.65612311]

Weights:[ 0.5268064  0.72112   -0.4144736]
Prediction:0.14041599999999999
Weights:[ 0.5268064   0.79045536 -0.34513824]
Prediction:0.3066464
Weights:[ 0.5268064   0.79045536 -0.31062442]
Prediction:-0.34513824
Weights:[ 0.52614267  0.78979163 -0.31128815]
Prediction:1.006637344
Weights:[ 0.52614267  0.84194128 -0.2591385 ]
Prediction:0.4785034751999999
Weights:[ 0.49944225  0.84194128 -0.28583891]
Prediction:0.26700416768
Error:[0.96287018]

Weights:[ 0.47808192  0.84194128 -0.30719925]
Prediction:0.213603334144
Weights:[ 0.47808192  0.88846708 -0.26067345]
Prediction:0.5347420299776
Weights:[ 0.4780

In [18]:
# We observe that the highest weight means there is a correlation with the second parameter in each pattern
# streetlights = np.array( [[ 1, 0, 1 ],
#                          [ 0, 1, 1 ],
#                          [ 0, 0, 1 ],
#                          [ 1, 1, 1 ],
#                          [ 0, 1, 1 ],
#                          [ 1, 0, 1 ] ] )
# In this case, it's the second one which is far greater than the 2 others!
# This is caused by the up or down pressures on the weights during gradient descent 
# On average more up pressure on middle param weight and more down pressure on other 2

#### 3.1 Up and Down pressure

In [19]:
# This explains why generalization is desired to ensure that the model works well with data

In [20]:
# Neural Network learn by Error Attibution
# Refers to the process of assigning and understanding the contribution of weight to the overall error
# or loss of the network. It's a crucial aspect of training and optimizing NN.  The purpose if to identify
# how changes in the parameters of the network affect the error, which helps in adjusting the model to improve its performance.

# In the above code:
# weights = weights - (alpha * (input * delta)) 
# Weight updated.  Shared error measure and multiplying by each respective input
# Given a Shared error, the network needs to figure out which weights contributed so they can be adjusted

#### 3.2 Backpropagation and introducing non-linearity

In [21]:
# Neural networks would be restricted to modeling only linear relationships between inputs and outputs wihtout an 
# activation function.
# The activation function decides whether a neuron should be activated or not by calculating the weighted sum and further 
# adding non-linearity to it. The purpose of the activation function is to introduce non-linearity into the output of a neuron.

![](pynb_pics/act_fct.jpg)

In [22]:
# Backpropagation consists of adjusting the weights of the previousl layer according to the delta (error) calculated 
# during an iteration

![](pynb_pics/back_prop.jpg)

### 4.0 A Deep Neural Network Comes to Life

In [23]:
# 1 iteration

# Initialize weights and data
import numpy as np
np.random.seed(1)

def relu(x):
  return (x > 0) * x

#Returns 1 when output is more than 0, zero otherwise
def relu2deriv(output):
  return output>0

alpha = 0.2
hidden_size = 3
streetlights = np.array( [[ 1, 0, 1 ],
                          [ 0, 1, 1 ],
                          [ 0, 0, 1 ],
                          [ 1, 1, 1 ] ] )
walk_vs_stop = np.array([[ 1, 1, 0, 0]]).T

weights_0_1 = 2*np.random.random((3,hidden_size)) - 1
weights_1_2 = 2*np.random.random((hidden_size,1)) - 1

# Code makes a prediction and calculated the ouput error and delta
layer_2_error = 0
layer_0 = streetlights[0:1]
layer_1 = relu(np.dot(layer_0,weights_0_1))
layer_2 = np.dot(layer_1,weights_1_2)
layer_2_error += np.sum((layer_2 - walk_vs_stop[0:1]) ** 2)
layer_2_delta = (walk_vs_stop[0:1] - layer_2)

# Backpropagating from layer 2 to layer 1
layer_1_delta=layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1) 

# Weight deltas and updates weights
weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

print( "Error:" + str(layer_2_error))
print( "Layer 0:" + str(layer_0))
print( "Layer 1:" + str(layer_1))
print( "Layer 2:" + str(layer_2))
print( "Layer 1 Delta:" + str(layer_1_delta))
print( "Layer 2 Delta:" + str(layer_2_delta))
print( "Weights 1_2:" + str(weights_1_2))
print( "Weights 0_1:" + str(weights_0_1))


Error:1.0430445982842722
Layer 0:[[1 0 1]]
Layer 1:[[-0.          0.13177044 -0.        ]]
Layer 2:[[-0.02129555]]
Layer 1 Delta:[[ 0.         -0.16505257  0.        ]]
Layer 2 Delta:[[1.02129555]]
Weights 1_2:[[ 0.07763347]
 [-0.13469566]
 [ 0.370439  ]]
Weights 0_1:[[-0.16595599  0.40763847 -0.99977125]
 [-0.39533485 -0.70648822 -0.81532281]
 [-0.62747958 -0.34188906 -0.20646505]]


In [24]:
weights_0_1 = 2*np.random.random((3,hidden_size)) - 1
print(weights_0_1)

[[-0.5910955   0.75623487 -0.94522481]
 [ 0.34093502 -0.1653904   0.11737966]
 [-0.71922612 -0.60379702  0.60148914]]


In [25]:
import numpy as np
np.random.seed(1)

def relu(x):
  return (x > 0) * x

def relu2deriv(output):
  return output>0

alpha = 0.2
hidden_size = 4
streetlights = np.array( [[ 1, 0, 1 ],
                          [ 0, 1, 1 ],
                          [ 0, 0, 1 ],
                          [ 1, 1, 1 ] ] )
walk_vs_stop = np.array([[ 1, 1, 0, 0]]).T

weights_0_1 = 2*np.random.random((3,hidden_size)) - 1
weights_1_2 = 2*np.random.random((hidden_size,1)) - 1

for iteration in range(10):
  layer_2_error = 0
  for i in range(len(streetlights)):
    layer_0 = streetlights[i:i+1]
    layer_1 = relu(np.dot(layer_0,weights_0_1))
    layer_2 = np.dot(layer_1,weights_1_2)
    layer_2_error += np.sum((layer_2 - walk_vs_stop[i:i+1]) ** 2)
    layer_2_delta = (walk_vs_stop[i:i+1] - layer_2)
    layer_1_delta=layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)
    weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
    weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
  # if(iteration == 0):
    print( "Error:" + str(layer_2_error))
    print( "Layer 0:" + str(layer_0))
    print( "Layer 1:" + str(layer_1))
    print( "Layer 2:" + str(layer_2))
    print( "Layer 1 Delta:" + str(layer_1_delta))
    print( "Layer 2 Delta:" + str(layer_2_delta))
  print("Iteration:" +  str(iteration) + "\n")

Error:0.3697329913497495
Layer 0:[[1 0 1]]
Layer 1:[[-0.          0.51828245 -0.         -0.        ]]
Layer 2:[[0.39194327]]
Layer 1 Delta:[[-0.          0.45983371 -0.          0.        ]]
Layer 2 Delta:[[0.60805673]]
Error:1.3281972624432705
Layer 0:[[0 1 1]]
Layer 1:[[-0.         -0.         -0.          0.06156045]]
Layer 2:[[0.02098811]]
Layer 1 Delta:[[-0.          0.         -0.          0.33377944]]
Layer 2 Delta:[[0.97901189]]
Error:1.4142058374213209
Layer 0:[[0 0 1]]
Layer 1:[[-0.          0.16960021 -0.          0.43719489]]
Layer 2:[[0.29327219]]
Layer 1 Delta:[[ 0.         -0.24026731  0.         -0.10352177]]
Layer 2 Delta:[[-0.29327219]]
Error:1.4142058374213209
Layer 0:[[1 1 1]]
Layer 1:[[-0. -0. -0. -0.]]
Layer 2:[[0.]]
Layer 1 Delta:[[0. 0. 0. 0.]]
Layer 2 Delta:[[0.]]
Iteration:0

Error:0.21497184958404528
Layer 0:[[1 0 1]]
Layer 1:[[-0.          0.65416247 -0.          0.02115568]]
Layer 2:[[0.53634943]]
Layer 1 Delta:[[-0.          0.37523987 -0.          0.1517

### 5.0 A Deep Neural Network with PyTorch