In [3]:
import torch
import torch.nn as nn
import pprint

pp = pprint.PrettyPrinter()

In [1]:
list_of_lists = [[1,2,3], [4,5,6]]
print(list_of_lists)

[[1, 2, 3], [4, 5, 6]]


In [5]:
# initialize the tensor
data = torch.tensor([[0,1], 
                     [2,3], 
                     [4,5]], dtype=torch.float32)

print(data)

tensor([[0., 1.],
        [2., 3.],
        [4., 5.]])


In [6]:
zeros = torch.zeros(2,5)
ones = torch.ones(2,5)
print(zeros)
print(ones)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])


In [7]:
rr = torch.arange(1,10)
print(rr)

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])


In [8]:
rr + 2

tensor([ 3,  4,  5,  6,  7,  8,  9, 10, 11])

In [9]:
A = torch.tensor([[1,2], [2,3], [4,5]])
B = torch.tensor([[1,2,3,4],[5,6,7,8]])
C = A @ B # or equivalently A.matmul(B)
print(f"A = {A}")
print(f"B = {B}")
print(f"C = A@B = {C}")



A = tensor([[1, 2],
        [2, 3],
        [4, 5]])
B = tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])
C = A@B = tensor([[11, 14, 17, 20],
        [17, 22, 27, 32],
        [29, 38, 47, 56]])


In [10]:
v = torch.tensor([1,2,3])
print(v.shape)

torch.Size([3])


In [11]:
torch.tensor([[1,2,3], [4,5,6]]) @ v

tensor([14, 32])

In [12]:
# reshaping tensors
rr =  torch.arange(1,16)
print(f"rr = {rr}")
print(f"Shape before reshaping: {rr.shape}")
rr = rr.view(5,3)
print(f"rr = {rr}")
print(f"Shape after reshaping: {rr.shape}")


rr = tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
Shape before reshaping: torch.Size([15])
rr = tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12],
        [13, 14, 15]])
Shape after reshaping: torch.Size([5, 3])


In [14]:
import numpy as np

# convert numpy array to torch tensor
arr = np.array([[1,0,5]])
data = torch.tensor(arr)
print(data)

# convert torch tensor to numpy array
new_arr = data.numpy()
print(new_arr)


tensor([[1, 0, 5]])
[[1 0 5]]


In [19]:
# vectorized operations
data = torch.arange(1, 36, dtype=torch.float32).reshape(5, 7)
print(data)

print(f"sum over columns: {data.sum(dim=0)}")
print(f"sum over rows: {data.sum(dim=1)}")
print(f"average over rows: {data.mean(dim=1)}")
print(f"sum over all elements of tensor: {data.sum()}")

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14.],
        [15., 16., 17., 18., 19., 20., 21.],
        [22., 23., 24., 25., 26., 27., 28.],
        [29., 30., 31., 32., 33., 34., 35.]])
sum over columns: tensor([ 75.,  80.,  85.,  90.,  95., 100., 105.])
sum over rows: tensor([ 28.,  77., 126., 175., 224.])
average over rows: tensor([ 4., 11., 18., 25., 32.])
sum over all elements of tensor: 630.0


In [22]:
# Quiz
A = torch.tensor([[1,2.2, 9.6], [4, -7.2, 6.3]])
print(f"A = \n{A}, \nshape = {A.shape}")
print()
row_avg = A.mean(dim=1)
print(f"Average over rows: \n{row_avg}, \nshape = {row_avg.shape}")
col_avg = A.mean(dim=0)
print(f"Average over cols: \n{col_avg}, \nshape = {col_avg.shape}")

A = 
tensor([[ 1.0000,  2.2000,  9.6000],
        [ 4.0000, -7.2000,  6.3000]]), 
shape = torch.Size([2, 3])

Average over rows: 
tensor([4.2667, 1.0333]), 
shape = torch.Size([2])
Average over cols: 
tensor([ 2.5000, -2.5000,  7.9500]), 
shape = torch.Size([3])


In [23]:
# tensor slicing
matr = torch.arange(15).view(5,3)
print(matr)
print(matr.shape)

tensor([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11],
        [12, 13, 14]])
torch.Size([5, 3])


In [24]:
matr[0]

tensor([0, 1, 2])

In [25]:
matr[0,:]

tensor([0, 1, 2])

In [26]:
matr[:,0]

tensor([ 0,  3,  6,  9, 12])

In [27]:
matr[0:3]

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [29]:
matr[:, 0:2]

tensor([[ 0,  1],
        [ 3,  4],
        [ 6,  7],
        [ 9, 10],
        [12, 13]])

In [31]:
matr[0:3][2]

tensor([6, 7, 8])

In [36]:
# access elements 0,2, and 4
matr[[0,2,4]]

tensor([[ 0,  1,  2],
        [ 6,  7,  8],
        [12, 13, 14]])

In [37]:
# access the 0th and 1st elements, each twice
matr[[0,0,1,1]]

tensor([[0, 1, 2],
        [0, 1, 2],
        [3, 4, 5],
        [3, 4, 5]])

In [39]:
# convert a tensor element into a python scalar value
matr[0,0].item()

0

In [40]:
#Exercise
B = torch.tensor([[1, 2.2, 9.6], [4, -7.2, 6.3]])
print(B)
print(f"first column = {B[:,0]}")
print(f"first row = {B[0]}")


tensor([[ 1.0000,  2.2000,  9.6000],
        [ 4.0000, -7.2000,  6.3000]])
first column = tensor([1., 4.])
first row = tensor([1.0000, 2.2000, 9.6000])


### AutoGrad

In [42]:
# set gradient tracking for this tensor
x = torch.tensor([2.0], requires_grad=True)

pp.pprint(x.grad)

None


In [43]:
# compute function of x
y = 3 * x * x

# backpropagate the gradients of y
y.backward()

# show the upstream/backpropagated gradient a x
pp.pprint(x.grad)


tensor([12.])


In [44]:
# running backprop from a different tensor
z = 3 * x * x
z.backward() 
# note that the different backpropagated gradients have accumulated at x
pp.pprint(x.grad)

tensor([24.])


### Neural Network Module

In [47]:
# linear layer

# create inputs
input = torch.ones(2,3,4) # 2 batches, 3 instances per batch, 4 features per instance
print(input)
print(input.shape)

# make linear layer that will transform input of shape (N, *, H_in) into output of shape (N, *, H_out), `*` can be any arbitrary number of intermediate dimensions
H_in = 4 # size of the last dimension of the input (e.g. number of features in an instance)
H_out = 2 # size of last dimension of output (e.g. number of hidden units/features for each instance)
linear = nn.Linear(H_in, H_out)

linear_output = linear(input)
# compute output = WX + b, where W and b are parameters of the linear layer
print(linear_output)
print(linear_output.shape)



tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])
torch.Size([2, 3, 4])
tensor([[[ 0.6883, -0.0623],
         [ 0.6883, -0.0623],
         [ 0.6883, -0.0623]],

        [[ 0.6883, -0.0623],
         [ 0.6883, -0.0623],
         [ 0.6883, -0.0623]]], grad_fn=<ViewBackward0>)
torch.Size([2, 3, 2])


In [48]:
# show parameters of linear layer
list(linear.parameters())

[Parameter containing:
 tensor([[ 0.0595,  0.1507,  0.2668, -0.2181],
         [ 0.2509,  0.0537, -0.3941,  0.1925]], requires_grad=True),
 Parameter containing:
 tensor([ 0.4293, -0.1652], requires_grad=True)]

In [49]:
# applying activation function
sigmoid = nn.Sigmoid()

output = sigmoid(linear_output)
print(output)

tensor([[[0.6656, 0.4844],
         [0.6656, 0.4844],
         [0.6656, 0.4844]],

        [[0.6656, 0.4844],
         [0.6656, 0.4844],
         [0.6656, 0.4844]]], grad_fn=<SigmoidBackward0>)


In [50]:
# sequential block of layers
H_in = 4
H_out = 2
block = nn.Sequential(
    nn.Linear(H_in,H_out),
    nn.Sigmoid()
)

input = torch.ones(2,3,4)
output = block(input)
print(output)

tensor([[[0.2660, 0.5987],
         [0.2660, 0.5987],
         [0.2660, 0.5987]],

        [[0.2660, 0.5987],
         [0.2660, 0.5987],
         [0.2660, 0.5987]]], grad_fn=<SigmoidBackward0>)


### Custom Module

In [56]:
# creating custom layers/modules as child classes extending from the nn.Module class

# a single hidden-layer perceptron model
class MultiLayerPerceptron(nn.Module):

    def __init__(self, input_size, hidden_size):

        # call base class constructor
        super(MultiLayerPerceptron, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size

        # define the layers in our model (final output size same as input size) 
        self.linear = nn.Linear(self.input_size, self.hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(self.hidden_size, self.input_size)
        self.sigmoid = nn.Sigmoid()            
        

    # must also implement the forward function for our custom module
    def forward(self, x):
        linear_output = self.linear(x)
        relu_output = self.relu(linear_output)
        linear2_output = self.linear2(relu_output)
        output = self.sigmoid(linear2_output)
        return output

In [57]:
# sample input 
input = torch.randn(2, 5)

# instantiate the model (with 3 hidden units)
model = MultiLayerPerceptron(5, 3)

# forward pass
output = model(input)
print(output)

tensor([[0.2877, 0.3811, 0.5304, 0.3834, 0.6573],
        [0.2345, 0.3950, 0.5593, 0.3409, 0.6938]], grad_fn=<SigmoidBackward0>)


In [60]:
# inspecting the parameters of our model
list(model.named_parameters())

# list(model.parameters()) # does not show name of layer each parameter belongs to

[('linear.weight',
  Parameter containing:
  tensor([[-0.0899,  0.0124, -0.2547,  0.2632,  0.3785],
          [ 0.2410,  0.1906, -0.3415,  0.2595,  0.4015],
          [-0.3060,  0.3868, -0.1156,  0.0063, -0.3740]], requires_grad=True)),
 ('linear.bias',
  Parameter containing:
  tensor([0.0698, 0.4089, 0.4340], requires_grad=True)),
 ('linear2.weight',
  Parameter containing:
  tensor([[-0.4442,  0.0930, -0.5207],
          [ 0.1719, -0.1546,  0.1177],
          [-0.1057,  0.0786,  0.2135],
          [-0.1152, -0.1813, -0.3345],
          [-0.0785,  0.2821,  0.2971]], requires_grad=True)),
 ('linear2.bias',
  Parameter containing:
  tensor([-0.5736, -0.5603, -0.0146, -0.2613,  0.4611], requires_grad=True))]

### Optimizers

In [61]:
import torch.optim as optim

In [62]:
# define some dummy data of 1s and add some noise
y = torch.ones(10, 5) # ground truth predictions
x = y + torch.randn_like(y) # inputs are ground truth with noise added (i.task of our model is predict noise-free input, i.e. "denoising")
print(x)

tensor([[-0.5637, -0.3139, -1.2994,  0.1776,  0.6535],
        [ 1.0292,  2.2749, -1.0866, -1.5438, -0.8153],
        [-0.2285,  1.4981,  3.0316,  2.2977,  0.3427],
        [ 0.0385,  3.3077,  0.5022,  0.6523,  0.1872],
        [ 2.1836,  0.9063, -0.5379,  1.4352,  0.5254],
        [-0.4631,  1.5313,  2.4692,  0.6737, -0.0400],
        [ 0.5188,  1.3091,  0.8802,  1.6411,  1.0992],
        [ 1.5983, -1.5868, -0.5141,  1.6641,  1.4023],
        [ 0.7643, -0.2611,  0.6698,  0.2939,  1.8008],
        [ 1.1789,  1.0759, -1.0149,  0.9886,  2.1318]])


In [63]:
# instantiate the model
model = MultiLayerPerceptron(5, 3)

# instantiate an adam optimizer, pass in the parameters which need to be updated and specify learning_rate
adam = optim.Adam(model.parameters(), lr=1.e-1)

# define loss function, use predefined binary-crossentropy loss module 
loss_function = nn.BCELoss()

# forward pass through our model
y_pred = model(x)
# compute loss
loss = loss_function(y_pred, y)
print(loss.item())

0.6354938745498657


#### Training the model to acheive a smaller loss

In [64]:
# number of training epochs
n_epochs = 10

# training loop
for epoch in range(n_epochs):

    # always reset gradients to zero before doing backward pass in every iteration
    adam.zero_grad()

    # get model predictions
    y_pred = model(x)

    # compute loss
    loss = loss_function(y_pred, y)

    # print stats
    print(f"Iteration # {epoch}, training loss: {loss}")

    # compute gradients
    loss.backward()

    # take a step to optimize the weights
    adam.step()


Iteration # 0, training loss: 0.6354938745498657
Iteration # 1, training loss: 0.5609316825866699
Iteration # 2, training loss: 0.4782762825489044
Iteration # 3, training loss: 0.37945666909217834
Iteration # 4, training loss: 0.278301477432251
Iteration # 5, training loss: 0.18868480622768402
Iteration # 6, training loss: 0.12010882049798965
Iteration # 7, training loss: 0.07230803370475769
Iteration # 8, training loss: 0.04266883805394173
Iteration # 9, training loss: 0.025564804673194885


In [65]:
# show parameters after training
list(model.parameters()) 

[Parameter containing:
 tensor([[ 0.6975,  0.8236,  0.4145,  0.7207,  0.6389],
         [ 0.4801, -0.8034, -1.1236,  0.5743,  1.0574],
         [-0.8147, -0.4146, -0.0251, -0.1509, -0.2488]], requires_grad=True),
 Parameter containing:
 tensor([ 1.0623,  0.5875, -0.3840], requires_grad=True),
 Parameter containing:
 tensor([[1.3533, 1.1272, 0.0325],
         [0.6256, 1.1075, 0.3937],
         [1.1118, 1.0860, 0.3375],
         [0.5975, 0.5584, 0.2360],
         [1.0359, 1.1111, 0.7857]], requires_grad=True),
 Parameter containing:
 tensor([1.1711, 1.2947, 0.6222, 0.8151, 1.2717], requires_grad=True)]

In [66]:
# lets check and see if the predictions are similar to the ground truth y (which contains all 1s)
y_pred = model(x)
print(y_pred)

tensor([[0.9927, 0.9910, 0.9847, 0.9341, 0.9924],
        [0.9643, 0.9069, 0.9143, 0.8524, 0.9477],
        [0.9998, 0.9900, 0.9985, 0.9814, 0.9988],
        [0.9994, 0.9849, 0.9968, 0.9726, 0.9976],
        [1.0000, 0.9993, 0.9998, 0.9940, 0.9999],
        [0.9972, 0.9699, 0.9890, 0.9477, 0.9925],
        [0.9998, 0.9949, 0.9990, 0.9846, 0.9993],
        [1.0000, 0.9999, 0.9999, 0.9964, 1.0000],
        [0.9997, 0.9974, 0.9987, 0.9821, 0.9992],
        [1.0000, 0.9998, 1.0000, 0.9971, 1.0000]], grad_fn=<SigmoidBackward0>)


In [68]:
# create some test data and check how model performs on it (test data drawn from same distribution as training data)
x_test = y + torch.randn_like(y)
print(x_test)

y_pred_test = model(x_test)
print(y_pred_test)

tensor([[-0.1212, -0.0696,  3.1144,  0.7191, -0.2049],
        [ 0.9153,  1.2836,  1.5781,  1.4026,  0.2676],
        [-0.1416,  0.7168,  1.0615,  1.3932,  0.6284],
        [ 1.5002,  0.2637,  2.3183,  0.0832,  0.2265],
        [ 0.0519,  2.4155,  1.7536,  2.3153,  1.2661],
        [ 0.5324,  1.5999, -0.7706, -0.0296,  0.9372],
        [ 1.1400,  0.3082, -0.3737,  1.0469,  0.8481],
        [ 1.2724,  2.8108, -1.2418,  1.3649,  0.0582],
        [ 1.8439,  0.5680, -0.9852, -1.0735,  2.0157],
        [ 1.2354,  0.2178,  2.3096, -0.2292,  1.3061]])
tensor([[0.9909, 0.9488, 0.9710, 0.9144, 0.9814],
        [0.9994, 0.9848, 0.9968, 0.9724, 0.9976],
        [0.9976, 0.9749, 0.9904, 0.9511, 0.9935],
        [0.9973, 0.9701, 0.9891, 0.9479, 0.9925],
        [0.9999, 0.9947, 0.9995, 0.9898, 0.9996],
        [0.9989, 0.9912, 0.9959, 0.9675, 0.9974],
        [0.9998, 0.9984, 0.9993, 0.9869, 0.9996],
        [0.9999, 0.9963, 0.9993, 0.9869, 0.9995],
        [0.9999, 0.9992, 0.9996, 0.9900, 0.9998],

Note that the model has learned how to filter out the noise from the input.