# Looking at the Math Behind a Neural Network:
A primer on the basic linear algebra that goes on under the hood.

In [1]:
import torch

In [2]:
def activation(x):
    """
    sigmoid activation function on x
    """
    return 1/(1 + torch.exp(-x))

In [3]:
# Single layer network:
torch.manual_seed(7)
features = torch.randn((1,5))
weights = torch.randn_like(features)
bias = torch.randn((1,1))

In [13]:
output = activation(torch.mm(features, weights.T) + bias)
output

tensor([[0.1595]])

In [29]:
# Two layer network (not including input layer): 
torch.manual_seed(7)

features = torch.randn((1,3))

# define network architecture:
n_input = features.shape[1]
n_hidden = 2
n_output = 1

# init weight and bias matrices:
W1 = torch.randn((n_input, n_hidden))
W2 = torch.randn((n_hidden, n_output))
b1 = torch.randn((1, n_hidden))
b2 = torch.randn((1, n_output))

# feed forward:
a1 = activation(torch.mm(features, W1) + b1)
output = activation(torch.mm(a1, W2) + b2)
output

tensor([[0.3171]])

In [None]:

### Generate some data
torch.manual_seed(7) # Set the random seed so things are predictable

# Features are 3 random normal variables
features = torch.randn((1, 3))

# Define the size of each layer in our network
n_input = features.shape[1]     # Number of input units, must match number of input features
n_hidden = 2                    # Number of hidden units 
n_output = 1                    # Number of output units

# Weights for inputs to hidden layer
W1 = torch.randn(n_input, n_hidden)
# Weights for hidden layer to output layer
W2 = torch.randn(n_hidden, n_output)

# and bias terms for hidden and output layers
B1 = torch.randn((1, n_hidden))
B2 = torch.randn((1, n_output))