In [1]:
from math import exp

# Print the neurons 
def print_arr(arr):
  i=1
  for item in arr:
    print("\nNeuron ", i)
    for key in item.keys():
      if key != 'delta':
        print(key, " ", item[key])
    print()
    i+=1

# Generating the dictionary to store the neuron
def gen(arr):
	res = []
	for item in arr:
		cur = {'weights': item}
		res.append(cur)
	return res

# This function does the initialization of the network
def create_network():
  network = []
  # Each dictionary would store the weight, output and delta of a specific neuron in a layer
  # Number of entries of weights list = number of incoming weights to that neuron + 1 (bias)
  # We have 2 links per hidden neuron thus 3 entries
  hidden_wts = [[0.1, -0.2, 0.1], [0, 0.2, 0.2], [0.3, -0.4, 0.5]] 
  # We have 3 links per output neuron thus 4 entries
  ouput_wts = [[-0.4, 0.1, 0.6, -0.1], [0.2, -0.1, -0.2, 0.6]]
  hidden_layer, output_layer = gen(hidden_wts), gen(ouput_wts)
  # Appending the layers in the network
  network.append(hidden_layer)
  network.append(output_layer)
  # Inital printing of information
  print("Initial weights \n")
  print("======= Hidden layer =======")
  print_arr(hidden_layer)
  print("\n======= Output layer =======")
  print_arr(output_layer)
  print()
  # Return the network formed
  return network
 
# This would return the weighted sum of the links to a specific neuron 
# added with the bias corresponding to that neuron which acts as the linear 
# component of the inputs
def linear_component(weights, inputs):
  # Bias to a particualr neuron is the last entry of the list
  bias = weights[-1]
  sum, weighted_sum = 0, 0
  for i in range(len(weights)-1):
    # Add the respective weighted sum
    weighted_sum += weights[i] * inputs[i]
  sum = weighted_sum + bias
  # Returning the resultant sum
  return sum

# Signmoid function implementation which is the non linear
# component of the input 
def non_linear_component(activation):
	return 1.0 / (1.0 + exp(-activation))
 

def forward_propagation(network, row):
	inputs = row
  # Iterate on the layers of the network
	for layer in network:
		new_inputs = []
    # For each layer, for each neuron 
		for neuron in layer:
      # Figure out the resultant input to this neuron which is the
      # weighted sum of the links along with the bias
			activation = linear_component(neuron['weights'], inputs)
      # Now the output of this neuron is the result of the activation
      # function which is then stored as new inputs for our next layer
			neuron['output'] = non_linear_component(activation)
			new_inputs.append(neuron['output'])
		inputs = new_inputs
  # The last list produced by the iteration is the result of the output 
  # layer which is being returned from here
	return inputs

# In theory this is the partial derivative of the output for a specifc 
# neuron with respect to the net
def transfer_derivative(output):
	return output * (1.0 - output)
 
# Once the output is generated by forward propagation and error is found
# we have to minimize that error
# This is achieved by backward propagation where the weights of the links are 
# adjusted in such a manner that the resultant error is minimized 
def backward_propagate_error(network, expected):
  # We start this process from the outer most layer 
	for i in reversed(range(len(network))):
    # Extract the current layer of neurons
		current_layer = network[i]
		errors = []
    # If I am not at the last neuron
		if i != len(network)-1:
			for j in range(len(current_layer)):
				error = 0.0
        # Since the network is traversed in the backwards direction 
        # this ensures that the output layer has delta values calculated first
        # which can be used in the hidden layer for calculating errors
				for neuron in network[i + 1]:
          # j is the index of the neuron's weight in the next layer
          # which is being multiplied with the delta value of that specific neuron 
          # from the next layer
					error += (neuron['weights'][j] * neuron['delta'])
				errors.append(error)
		else:
			for j in range(len(current_layer)):
				neuron = current_layer[j]
				errors.append(neuron['output'] - expected[j])
    # Once the errors are generated for the current layer neurons, 
    # those are then multiplied by the transfer derivative function 
    # which is a part of the formula obtained by the chain rule
		for j in range(len(current_layer)):
			neuron = current_layer[j]
			neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])
   
# This function is responsible for updating the weights with the error derivatives
# for a specific layer
def updation_of_weights(network, row, learning_rate):
	for i in range(len(network)):
    # Each row contains the output at the end of the row and remaining is input data
		inputs = row[:-1]
		if i != 0:
			inputs = [neuron['output'] for neuron in network[i - 1]]
		for neuron in network[i]:
			for j in range(len(inputs)):
        # As per the formula obtained by the chain rule, 
        # New weight = old weight - learning rate * partial derivative of the total error 
        # with respect to the current weight being considered
				neuron['weights'][j] -= learning_rate * neuron['delta'] * inputs[j]
			neuron['weights'][-1] -= learning_rate * neuron['delta']
 
# Now once the algoithm is in place we have to train the neural network with the dataset 
# provided in the question
def neural_network_training(network, training_dataset, learning_rate, tot_epochs, tot_outputs):
	for epoch in range(tot_epochs):
    # For each of the iterations update the weights, and find the error 
		error_sum = 0
		for row in training_dataset:
      # Output produced with the current input set 
			outputs = forward_propagation(network, row)
			expected = [0 for i in range(tot_outputs)]
      # We have the expected output for this particular input which is stored at the end
      # of this row
			expected[row[-1]] = 1
      # Error = (expected - obtained) ^ 2
      # Here we are obtaining the error for all the output neurons and adding them together 
      # to find the total error corresponding to this iteration
			error_sum += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))])
      # Once total error is generated we update the weights to fit the data by the method of
      # back propagation
			backward_propagate_error(network, expected)
      # Updation of weights
			updation_of_weights(network, row, learning_rate)
		print('Epoch=%d, Learning Rate=%.3f, Error=%.3f' % (epoch+1, learning_rate, error_sum))
 
# Training dataset as per the question
dataset = [[0.6, 0.3, 1], [0.2, 0.3, 0]]
# Generating the number of distinct ouputs possible 
n_outputs = len(set([row[-1] for row in dataset]))
# Network creation
network = create_network()
# Training the neural network
neural_network_training(network, dataset, 0.1, 5000, n_outputs)

print("\nFinal weights \n")
print("======= Hidden layer =======")
print_arr(network[0])
print("\n======= Output layer =======")
print_arr(network[1])

Initial weights 


Neuron  1
weights   [0.1, -0.2, 0.1]


Neuron  2
weights   [0, 0.2, 0.2]


Neuron  3
weights   [0.3, -0.4, 0.5]



Neuron  1
weights   [-0.4, 0.1, 0.6, -0.1]


Neuron  2
weights   [0.2, -0.1, -0.2, 0.6]


Epoch=1, Learning Rate=0.100, Error=1.050
Epoch=2, Learning Rate=0.100, Error=1.049
Epoch=3, Learning Rate=0.100, Error=1.047
Epoch=4, Learning Rate=0.100, Error=1.046
Epoch=5, Learning Rate=0.100, Error=1.045
Epoch=6, Learning Rate=0.100, Error=1.044
Epoch=7, Learning Rate=0.100, Error=1.042
Epoch=8, Learning Rate=0.100, Error=1.041
Epoch=9, Learning Rate=0.100, Error=1.040
Epoch=10, Learning Rate=0.100, Error=1.039
Epoch=11, Learning Rate=0.100, Error=1.038
Epoch=12, Learning Rate=0.100, Error=1.037
Epoch=13, Learning Rate=0.100, Error=1.036
Epoch=14, Learning Rate=0.100, Error=1.035
Epoch=15, Learning Rate=0.100, Error=1.035
Epoch=16, Learning Rate=0.100, Error=1.034
Epoch=17, Learning Rate=0.100, Error=1.033
Epoch=18, Learning Rate=0.100, Error=1.032
Epoch=19, L