In [2]:
import numpy as np
import torch

A simple hypothesis function is the dot/inner product of two vectors :

$$
h = \begin{bmatrix}
x_1 \, x_2 \cdots  x_n
\end{bmatrix}
\cdot 
\begin{bmatrix}
           w_1 \\
           w_2 \\
           \vdots \\
           w_n
\end{bmatrix} + b
$$

$$
y = sigmoid(h)
$$


In [3]:
def sigmoid_activation(h):
    return 1/(1 + torch.exp(-h))

In [46]:
### Generate some synthetic data
torch.manual_seed(13) 

n_features = 10
m_examples = 1

# create X vector sampled from a gaussian distribution
X = torch.randn((m_examples,n_features))
# create W vector sampled from a gaussian distribution with the same shape of X 
W = torch.rand_like(X)
# create bias vector 
b = torch.randn(m_examples,m_examples)


In [47]:
# There are two different methods in pytorch to do matrix multiplcations
# 1. using * but it is too risky as if X and W dimensions are not consistent, * will do broadcasting to unify the dimensions.
y = sigmoid_activation((X*W).sum() + b)

# 2. using torch.mm or torch.matmul which raise error in case if the dimensions are not consistent like the following example

y = sigmoid_activation(torch.mm(X,W) + b)

RuntimeError: size mismatch, m1: [1 x 10], m2: [1 x 10] at /Users/soumith/miniconda2/conda-bld/pytorch_1532624435833/work/aten/src/TH/generic/THTensorMath.cpp:2070

In [None]:
# We need to transpose W vector before doing dot product , there are three methods in pytorch to do so 
# 1. W.reshape(rows,columns) returns a new vector with the new shape and keeps clone of the original W in the memory. This is not efficient as it copies all the data in the memory. Sometimes, it returns a new view of W when possible.
# 2. W.resize_(rows,columns) resizes W inplace. The problem with resize_ is that if the new shape results fewer than elements than the old shape, some of the elements will be truncated.
# 3. W.view(rows,columns) returns new tensor of W with the new shape.

y = sigmoid_activation(torch.mm(X,W.view(n_features,m_examples)) + b)

$$
h_{(m,hidden)} = sigmoid(X_{(m,n)} * W1_{(n,hidden)} + b1_{(m,hidden)}) 
$$

$$
output_{(m,output\_dim)} = sigmoid(h_{(m,hidden)} * W2_{(hidden,output\_dim)} + b2_{(m,output\_dim)}) 
$$



In [49]:
# Now, consider a simple NN with 3 inputs and 2 hidden layers. 
#The first weights matrix W1 dimensions should be (num_of_features,num_of_hidden_layers).
#The second weights matrix W2 dimensions should be (num_of_hidden_layers,output_dimension)

num_features = 3
num_hidden_layers = 2
m_examples = 10
n_output_dimensions = 1

X = torch.randn(m_examples,num_features)
W1 = torch.randn(num_features,num_hidden_layers)
W2 = torch.randn(num_hidden_layers,n_output_dimensions)
b1 = torch.randn(m_examples,num_hidden_layers)
b2 = torch.randn(m_examples,n_output_dimensions)


In [50]:
h = sigmoid_activation(torch.mm(X,W1) + b1)
output = sigmoid_activation(torch.mm(h,W2) + b2)
assert output.shape[0] == m_examples
assert output.shape[1] == n_output_dimensions