In [1]:
#usually used to import all packages
import torch 

## Training a neural network 
1 epoch is when the neural network has trained all the batches for 1 iteration
ReLu stands for Rectified Linear Unit and is a type of activation function 

### Training steps
 - Take a batch of samples and targets 
 - Forward pass  (to obtain predictions)
 - Calculate los  (the mismatch between target and predictions)
 - backward pass (to compute the gradients of the loss)
 - update weights of the parameter (using optimizer) w = w - learning rate x [gradient of the loss w.r.t w]

In [2]:
# create an array of n rows and n columns filled with zeros
w = torch.zeros(4,3)
#  gets the size of zeros
w.size() 
w.shape

torch.Size([4, 3])

In [4]:
# create a random tensor using normal distribution
w = torch.randn(4,3)
w

tensor([[-0.7142,  1.4934,  0.8451],
        [ 0.1996, -0.1079, -0.6758],
        [-2.0869, -1.3709, -1.0126],
        [-1.3892,  0.4781,  0.7836]])

In [5]:
# create a random tensor using another tensor dimensions
t = torch.randn_like(w) 
t

tensor([[ 0.1925, -0.4855,  0.7450],
        [-2.0761,  1.2648, -0.2728],
        [-1.3845,  0.2081, -0.1592],
        [-0.6917, -0.4981, -1.9001]])

In pytorch, any tensor that mutates in place will use the underscore function 

In [6]:
w.fill_(1)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [12]:
#to reshape a tensor 
t = w.view(2,6)
t
#let pytorch figure out the other dimension by replacing the column with -1 
w.view(3,-1)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

When converting from a torch tensor to a numpy array, remember that when torch tensor is on cpu,the torch tensor and NumPy array will share their underlying memory location, so changing one will change the other

In [14]:
#convert to a numpy array
type(w.numpy())

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]], dtype=float32)

# using autograd 

In [20]:

w = torch.randn(4,3,requires_grad=True)
w

tensor([[-0.7179, -1.3929, -0.7599],
        [-0.8235,  0.7512,  1.0649],
        [-0.5350,  1.1465, -1.4948],
        [-1.0698,  0.5639,  0.1092]], requires_grad=True)

In [21]:
y = torch.exp(w)
y

tensor([[0.4878, 0.2483, 0.4677],
        [0.4389, 2.1195, 2.9005],
        [0.5857, 3.1472, 0.2243],
        [0.3431, 1.7574, 1.1154]], grad_fn=<ExpBackward0>)

In [22]:
y.grad_fn

<ExpBackward0 at 0x1b767483430>

In [29]:
outp = y.mean() 
outp

tensor(1.1530, grad_fn=<MeanBackward0>)

In [30]:
w.grad

tensor([[0.0406, 0.0207, 0.0390],
        [0.0366, 0.1766, 0.2417],
        [0.0488, 0.2623, 0.0187],
        [0.0286, 0.1465, 0.0929]])

In [26]:
outp.backward()

In [27]:
w.grad

tensor([[0.0406, 0.0207, 0.0390],
        [0.0366, 0.1766, 0.2417],
        [0.0488, 0.2623, 0.0187],
        [0.0286, 0.1465, 0.0929]])

Optimizers
Remember to zero out gradients for each epoch  using optimizer.zero_grad()

CPU to GPU
 **Remember** : Model should be converted to cpu for testing, the samples and labels are automatically converted to cpu,
 
 

In [None]:
#choose if cpu or gpu
device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#change manually
device.to('cpu')