In [1]:
import torch

In [2]:
torch.__version__

'2.0.1+cpu'

## Tensors

### Typical tensor operations

In [4]:
tensor_array = torch.Tensor([[1,2], [4,5]])
tensor_array

tensor([[1., 2.],
        [4., 5.]])

In [5]:
# uninitialized tensor (can not be accessed)
tensor_uninitialized = torch.Tensor(3, 3)
torch.numel(tensor_uninitialized)

9

In [6]:
# initialize a tensor with torch.rand for float
tensor_initialized = torch.rand(2, 3)
tensor_initialized

tensor([[0.3708, 0.3674, 0.3466],
        [0.2919, 0.3940, 0.0095]])

In [12]:
# you can define the data type when initializing
tensor_int = torch.randn(5, 3).type(torch.IntTensor)
tensor_int

tensor([[ 0,  0,  0],
        [ 0, -1,  1],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  1]], dtype=torch.int32)

In [13]:
# define long int tensors
tensor_long = torch.LongTensor([1.0, 2.0, 3.0])
tensor_long

tensor([1, 2, 3])

In [16]:
# define byte tensors
tensor_byte = torch.ByteTensor([0, 127, 1, -5])
tensor_byte

tensor([  0, 127,   1, 251], dtype=torch.uint8)

In [17]:
# initialize tensor with 1s
tensor_ones = torch.ones(10)
tensor_ones

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [18]:
# initialize tensor with 0s
tensor_zeros = torch.zeros(10)
tensor_zeros

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [19]:
# initialize tensor with diagno matrix
tensor_eye = torch.eye(3)
tensor_eye

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [20]:
# nonzero function returns the positions of all nonzero elements
non_zero = torch.nonzero(tensor_eye)
non_zero

tensor([[0, 0],
        [1, 1],
        [2, 2]])

In [22]:
# create tensor with all 1s and the same shape as another tensor
# the output tensor has the same shape as tensor_eye with all 1s
tensor_ones_shape_eye = torch.ones_like(tensor_eye)
tensor_ones_shape_eye

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [23]:
# in place operations. all in place function has _suffix
initial_tensor = torch.rand(3, 3)
initial_tensor.fill_(3) # fill elements by value of 3

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])

In [24]:
# out of place operation will create and return a new tensor
new_tensor = initial_tensor.add(4)
new_tensor

tensor([[7., 7., 7.],
        [7., 7., 7.],
        [7., 7., 7.]])

In [25]:
initial_tensor

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])

In [27]:
# in place add_ oprator changes the value in place
initial_tensor.add_(5)
initial_tensor

tensor([[8., 8., 8.],
        [8., 8., 8.],
        [8., 8., 8.]])

### Tensor operations with numpy

In [28]:
import numpy as np

In [29]:
numpy_arr = np.array([1, 2, 3])
numpy_arr

array([1, 2, 3])

In [30]:
# convert numpy array to tensor
tensor = torch.from_numpy(numpy_arr)
tensor

tensor([1, 2, 3], dtype=torch.int32)

In [31]:
# convert tensor to numpy array
numpy_from_tensor = tensor.numpy()
numpy_from_tensor

array([1, 2, 3])

In [32]:
# tensor and np array share the same memory
# change one will change another
numpy_arr[1] = 4
numpy_arr

array([1, 4, 3])

In [33]:
tensor

tensor([1, 4, 3], dtype=torch.int32)

### Access tensor elements


In [35]:
# by dimension indices
initial_tensor = torch.rand(2, 3)
initial_tensor

tensor([[0.1482, 0.1976, 0.5693],
        [0.2040, 0.3582, 0.7984]])

In [36]:
# select by element index
initial_tensor[0, 2] 

tensor(0.5693)

In [37]:
# select subset of elemens by slicing
initial_tensor[:, 1:]

tensor([[0.1976, 0.5693],
        [0.3582, 0.7984]])

In [41]:
# create a view that share the same memory as original tensor
# the view have differnt dimensions from the original tensor
resized_tensor = initial_tensor.view(6)
resized_tensor.shape

torch.Size([6])

In [43]:
initial_tensor[0, 2] = 0.111

In [44]:
# the resized_tensor flattern the initial_tensor to 1d
resized_tensor

tensor([0.1482, 0.1976, 0.1110, 0.2040, 0.3582, 0.7984])

In [45]:
# resize anoter view of the tensor
resized_tensor = initial_tensor.view(3, 2)
resized_tensor

tensor([[0.1482, 0.1976],
        [0.1110, 0.2040],
        [0.3582, 0.7984]])

In [46]:
# use pytorch to infer the dimension using -1
resized_matrix = initial_tensor.view(-1, 2)
resized_matrix

tensor([[0.1482, 0.1976],
        [0.1110, 0.2040],
        [0.3582, 0.7984]])

### Getting tensor dimension by size() and shape()

In [38]:
initial_tensor.size()

torch.Size([2, 3])

In [40]:
initial_tensor.shape

torch.Size([2, 3])

### Sort tensor values and sorted index

In [47]:
initial_tensor


tensor([[0.1482, 0.1976, 0.1110],
        [0.2040, 0.3582, 0.7984]])

In [48]:
# sort tensor elements by row (dim=1)
sorted_tensor, sorted_indices = torch.sort(initial_tensor)

In [49]:
sorted_tensor

tensor([[0.1110, 0.1482, 0.1976],
        [0.2040, 0.3582, 0.7984]])

In [50]:
sorted_indices

tensor([[2, 0, 1],
        [0, 1, 2]])

In [51]:
# sort tensor by columns using dim = 0
sorted_tensor, sorted_tensor_indices = torch.sort(initial_tensor, dim= 0)

In [52]:
sorted_tensor

tensor([[0.1482, 0.1976, 0.1110],
        [0.2040, 0.3582, 0.7984]])

In [53]:
sorted_tensor_indices

tensor([[0, 0, 0],
        [1, 1, 1]])

### Computation Graph
* in pytorch, it is dynamic
* everything you set up in deep learning is a computation graph
* tensorflow runs in "Define, then run" philosophy
  + you first build a graph that specifies the operations and the data
  + then you run the graph by executing the graph to get the final result
* pytorch follows the "define by run"
  + when building the graph, it allows you to run the graph
  + build and execute the graph in one go - execute as you build
*   

### Summary of Pytorch
* PyTorch is a deep learning framework
* More tightly integrated wit Python than Tensorflow
* Can use other python libraries, debugger
* Supports dynamic computation graphs, update the graph for each epoch
* Uses a forward pass for prediction, backward pass to update weights

## Simple Neural Networks

### Autograd
* Pytorch uses the Autograd library for backprogagation during training
* Autograd relies on reverse-mode automatic differentiation
* Conceptually similar to autodiff in TensorFlow
* NLL as loss function and LogSoftMax as output layer

### Metric used to train model
* Mean square Error (MSE) is the metric to be minimized during training of regression model
* Mean square error more specificall is Mean Square Error of Loss, where Loss is the difference between predicted and actural

### Calculate Gradients
* Symbolic differentiation
  + conceptually simple but hard to implement
* numeric differentiation
  + easy to implement but won't scale
* automatic differentiation
  + conceptually difficult but easy to implement
  + Pytorch, TensorFlow and other packages rely on automatic differentiation
  + relies on a mathematical trick based on Taylor's Series Expansion that allows fast approximation of gradients 

### Autograd
* the PyTorch package for calculating gradients for back progagation
* Autograd is a core package for automatic differentiation
* It remembers all executed operations in the forward phase and replays them in the backward phase

#### Properties of tensors
* every tensor has requires_grad property to define if the tensor will be tracked for backpropagation
* you set requires_grad by requires_grad_() function
* every tensor has grad property, which is a tensor that accumulates the gradient of the computations w.r.t this tensor after the backeward pass
* every tensor has grad_fn, that defines the gradient function
* both grad and grad_fn will be None until you calculate the backward gradient in a graph
* If the tensors in the computation have requires_grad = True the computed output will be true as well
* to stop autograd from tracking history on newly created tensors, use with torch.no_grad(): context manager
  + tensors created in this context manager will not track its grad property, and requires_grad is False

In [97]:
# demo code of autograd

tensor = torch.Tensor([[3, 4],
                       [7, 5]])
tensor

tensor([[3., 4.],
        [7., 5.]])

In [98]:
# requires_grad defines if the computation by this
# tensor will be tracked by autograd package
tensor.requires_grad

False

In [99]:
# to set requires_grad to true so that the calculation
# can be tracked for gradient calculaton, use requrires_grad_()
tensor.requires_grad_()
tensor.requires_grad

True

In [100]:
print(tensor.grad)

None


In [101]:
print(tensor.grad_fn)

None


In [102]:
out = (tensor * tensor).mean()

In [103]:
out.requires_grad

True

In [104]:
print(out.grad)

None


  print(out.grad)


In [105]:
print(tensor.grad)

None


In [106]:
# we have a gradient function associated with the out tensor
# becuase out is the result of a computation
print(out.grad_fn)

<MeanBackward0 object at 0x00000207C816A950>


In [107]:
# if we calculate the backward path of out
# we will have its grad tensor to store the accumulated backward grad
# whenever we calculate out.backward, all tensors on its path will have grad calculated
out.backward()
print(tensor.grad)
print(out.grad)

tensor([[1.5000, 2.0000],
        [3.5000, 2.5000]])
None


  print(out.grad)


In [108]:
print(tensor.grad)

tensor([[1.5000, 2.0000],
        [3.5000, 2.5000]])


In [109]:
# since tensor has requires_grad as true, any computed output will have requires_grad as true
new_tensor = tensor * tensor
print(new_tensor.requires_grad)

True


In [112]:
with torch.no_grad():
    # since new_tensor is created in the context manager, its grad will not be tracked
    new_tensor = tensor * tensor
    print(f'new_tensor = {new_tensor}')
    print(f'requires_grad for tensor = {tensor.requires_grad}')
    print(f'requires_grad for new_tensor = {new_tensor.requires_grad}')
    

new_tensor = tensor([[ 9., 16.],
        [49., 25.]])
requires_grad for tensor = True
requires_grad for new_tensor = False


### Reverse auto-differentiation
* back progagation is implemented using a technique called reverse auto-differentiation
  + calcualte gradients used to update the model parameters 
* reverse-mode auto-differentiation is used in tensorflow and pytorch
* two passes are required in each training step
  + forward step to calculate loss
  + backward step to calucate grad and update parameters

### Demonstration of Linear Model using Autograd