Pytorch tutorial

In [3]:
import torch

In [4]:
#number
t1=torch.tensor(4.)
t1

tensor(4.)

In [5]:
t1.dtype

torch.float32

In [6]:
#vector
t2=torch.tensor([1.,2,3,4])
t2

tensor([1., 2., 3., 4.])

In [7]:
t2.dtype

torch.float32

In [8]:
#matrix
t3=torch.tensor([[5,6],
                 [7,8],
                 [9,10]])
t3

tensor([[ 5,  6],
        [ 7,  8],
        [ 9, 10]])

In [9]:
# 3-dimentional array
t4=torch.tensor([
    [
        [1,2,3],[4,5,6],
        [7,8,9]
    ]
])
t4

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

tensors can have any number of dimensions and different lengths along each dimension.We can inspect the length along each dimension using the .shape property of a tensor.

In [10]:
t1.shape

torch.Size([])

In [11]:
t2.shape


torch.Size([4])

In [12]:
t3.shape

torch.Size([3, 2])

In [13]:
t4.shape

torch.Size([1, 3, 3])

note that is not possible to create improper shape

In [15]:
t5=torch.tensor([[1,2,3],[4]])
t5

ValueError: expected sequence of length 3 at dim 1 (got 1)

Tensor operation ad gradients

In [18]:
#Create tensor
x=torch.tensor(3.)

w=torch.tensor(4.,requires_grad=True)
b=torch.tensor(5.,requires_grad=True)
x,w,b

(tensor(3.), tensor(4., requires_grad=True), tensor(5., requires_grad=True))

We've created three tensor x,w and b,all numbers wand b have an additional parameter require_grad set to True.We'll see what it does in just a moment.
Let's create a new tensor y by combininig these tensors.

In [19]:
#Arithmetic operations
y = w*x + b
y

tensor(17., grad_fn=<AddBackward0>)

In [20]:
#derivative of y
y.backward()

In [21]:
#display gradients
print('dy/dx:',x.grad)
print('dy/dw:',w.grad)
print('dy/db:',b.grad)

dy/dx: None
dy/dw: tensor(3.)
dy/db: tensor(1.)


Tensor Functions

In [22]:
#create a tensor with a died value for every element
t6=torch.full((3,2),42)
t6

tensor([[42, 42],
        [42, 42],
        [42, 42]])

In [23]:
# concatenate two tensors with compatible shapes
t7=torch.cat((t3,t6))
t7

tensor([[ 5,  6],
        [ 7,  8],
        [ 9, 10],
        [42, 42],
        [42, 42],
        [42, 42]])

In [24]:
#compute the sin of each element
t8=torch.sin(t7)
t8

tensor([[-0.9589, -0.2794],
        [ 0.6570,  0.9894],
        [ 0.4121, -0.5440],
        [-0.9165, -0.9165],
        [-0.9165, -0.9165],
        [-0.9165, -0.9165]])

Interoperatibilty with numpy

In [25]:
import numpy as np

x=np.array([[1,2],[3,4.]])
x

array([[1., 2.],
       [3., 4.]])

In [26]:
#convert numpy array to a torch tensor

y=torch.from_numpy(x)
y

tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64)

In [27]:
#verify the datatype
x.dtype,y.dtype

(dtype('float64'), torch.float64)

In [28]:
#convert a torch tensor to a numpy array

z=y.numpy()
z

array([[1., 2.],
       [3., 4.]])

interoperability between pytorch and numpy is essential beccause most datasets you;ll work with will liley be prepocessed as Numpy arrays.

You mi wonder why we need a library like Pytorch at all since Numpy already provides data structures and utilities for working with multi - dimensional numeric data. Trere are two main reasons:

1.Autograd:The ability to automatically compute gradients for tensor perations is essential for training deep learning models.
2.GPU support: While woking with massive datasets and large models,PyTorch tensor operations can be performed efficiently using a Graphics Processing  Unit(GPU).Computations that might typically take hours can be completes within minutes using GPUs.

We'll leverage both these features of PyTorch extensively in this tutorial series;

In [29]:
import numpy as np
import torch

Training Data




In [30]:
# Input (temp,rainfall,humidity)
inputs=np.array([[73,67,43],
                [91,88,64],
               [87,134,58],
               [102,43,37],
               [69,96,70]],dtype='float32'
               )

In [31]:
# Target (apples,oranges)
targets=np.array([[56,70],
                  [81,101],
                  [119,133],
                  [22,37],
                  [103,119]],dtype='float32')

In [32]:
#convert arrray to tenso
inputs=torch.from_numpy(inputs)
targets=torch.from_numpy(targets)
inputs,targets

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.],
         [102.,  43.,  37.],
         [ 69.,  96.,  70.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]))

Linear regration model from scratch

In [33]:
# weights and biases
#randn give random data from  normal gragh
# yield_apple=w11*temp+w12*rainfall+w13*humidity+b1
#yield_orange=w21*temp +w22*rainfall+w23* humidity +b2

w=torch.randn(2,3,requires_grad=True)
b=torch.randn(2,requires_grad=True)

In [34]:
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])

@ vector matrix multiplicaton
.t() for transpose of tensor


In [35]:
inputs@ w.t() +b

tensor([[ -25.2761,  -89.7255],
        [ -16.0941, -135.3451],
        [ -31.1899,  -92.9622],
        [ -61.1443,  -96.3974],
        [  15.6164, -139.7357]], grad_fn=<AddBackward0>)

In [36]:
def model(x):
  return x @ w.t()+b

In [37]:
#Generate predictions
preds=model(inputs)
print(preds)

tensor([[ -25.2761,  -89.7255],
        [ -16.0941, -135.3451],
        [ -31.1899,  -92.9622],
        [ -61.1443,  -96.3974],
        [  15.6164, -139.7357]], grad_fn=<AddBackward0>)


In [38]:
#Compare with targets
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [39]:
diff=preds-targets
torch.sum(diff*diff)/diff.numel()

tensor(27030.8164, grad_fn=<DivBackward0>)

LOSS function  

In [40]:
#MSE Loss
def mse(t1,t2):
  diff=t1-t2
  return torch.sum(diff*diff)/diff.numel()

In [41]:
# compute loss
loss=mse(preds,targets)
loss

tensor(27030.8164, grad_fn=<DivBackward0>)

Compute gradients

In [42]:
loss.backward()

In [43]:
print(w)
print(w.grad)

tensor([[-1.0875, -0.2160,  1.5854],
        [-0.2764,  0.5705, -2.5059]], requires_grad=True)
tensor([[ -8469.0850,  -9215.8506,  -5522.6191],
        [-16857.0742, -18470.7266, -11629.4590]])


In [44]:
w-w.grad*1e-5

tensor([[-1.0029, -0.1238,  1.6406],
        [-0.1078,  0.7552, -2.3896]], grad_fn=<SubBackward0>)

In [45]:
print(b)
print(b.grad)

tensor([ 0.4125, -0.0152], requires_grad=True)
tensor([ -99.8176, -202.8332])


In [46]:
with torch.no_grad():
  w-=w.grad*1e-5
  b-=b.grad*1e-5

In [47]:
w,b

(tensor([[-1.0029, -0.1238,  1.6406],
         [-0.1078,  0.7552, -2.3896]], requires_grad=True),
 tensor([ 0.4135, -0.0132], requires_grad=True))

In [48]:
pred=model(inputs)
loss=mse(pred,targets)
loss

tensor(18401.5625, grad_fn=<DivBackward0>)

In [49]:
# adjust weight & reset gradients
with torch.no_grad():
  w-=w.grad*1e-5
  b-=b.grad*1e-5
  w.grad.zero_()
  b.grad.zero_()

In [50]:
w,b

(tensor([[-0.9182, -0.0316,  1.6958],
         [ 0.0607,  0.9399, -2.2733]], requires_grad=True),
 tensor([ 0.4145, -0.0112], requires_grad=True))

In [51]:
pred=model(inputs)
loss=mse(pred,targets)
loss

tensor(11469.4316, grad_fn=<DivBackward0>)

Train for multiple epochs
To reduce the loss further ,we can repeat the process of adjusting the weights and biases using the gradients multiple times;Each iteration is called an epoch

In [52]:
#Train for 100 epochs
for i in range(100):
  preds=model(inputs)
  loss=mse(preds,targets)
  loss.backward()
  with torch.no_grad():
    w-=w.grad*1e-5
    b-=b.grad*1e-5
    w.grad.zero_()
    b.grad.zero_()

In [53]:
#lets verify the result
pred=model(inputs)
loss=mse(pred,targets)
loss

tensor(441.2385, grad_fn=<DivBackward0>)

Linear regression using Pytorch built-ins

torch.nn package from PyTorch ,which contains utility classes for building neural networks

In [54]:
import torch.nn as nn

In [55]:
#input(temp,raifall,humidity)
inputs=np.array([
    [73,67,43],
    [91,88,64],
    [87,134,58],
    [102,43,37],
    [91,87,65],
    [69,96,70],
    [74,66,43],
    [88,134,59],
    [101,44,37],
    [68,96,71],
    [92,87,64],
    [87,135,57],
    [103,43,36],
    [68,97,70],
    [73,66,44]
],dtype='float32')

In [56]:
#targets1
targets=np.array([[56,70],
                  [81,101],
                  [119,133],
                  [22,37],
                  [103,119],
                  [57,69],
                  [80,102],
                  [118,132],
                  [21,38],
                  [104,118],
                  [57,69],
                  [82,100],
                  [118,134],
                  [20,38],
                  [102,120]],dtype='float32')

In [57]:
inputs.shape,targets.shape


((15, 3), (15, 2))

In [58]:
inputs=torch.from_numpy(inputs)
targets=torch.from_numpy(targets)

Dataset and Dataloader

In [59]:
from torch.utils.data import TensorDataset

In [60]:
#Define datasets
train_ds=TensorDataset(inputs,targets)
train_ds[0:8]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.],
         [102.,  43.,  37.],
         [ 91.,  87.,  65.],
         [ 69.,  96.,  70.],
         [ 74.,  66.,  43.],
         [ 88., 134.,  59.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.],
         [ 57.,  69.],
         [ 80., 102.],
         [118., 132.]]))

In [61]:
from torch.utils.data import DataLoader

In [62]:
#define data loader
batch_size=5
train_dl=DataLoader(train_ds,batch_size,shuffle=True)


In [63]:
# we can use the data loader in a for loop .lets look at an exampple
for xb,yb in train_dl:
  print(xb)
  print(yb)
  break

tensor([[ 91.,  87.,  65.],
        [103.,  43.,  36.],
        [ 74.,  66.,  43.],
        [ 68.,  96.,  71.],
        [ 73.,  67.,  43.]])
tensor([[103., 119.],
        [118., 134.],
        [ 80., 102.],
        [104., 118.],
        [ 56.,  70.]])


nn.linear

In [64]:
#define model
model=nn.Linear(3,2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[ 0.4371, -0.3823, -0.0723],
        [ 0.2295, -0.2042, -0.0506]], requires_grad=True)
Parameter containing:
tensor([-0.4050,  0.5601], requires_grad=True)


In [65]:
#Parameter
list(model.parameters())

[Parameter containing:
 tensor([[ 0.4371, -0.3823, -0.0723],
         [ 0.2295, -0.2042, -0.0506]], requires_grad=True),
 Parameter containing:
 tensor([-0.4050,  0.5601], requires_grad=True)]

In [66]:
#generate prediction
pred=model(inputs)
pred


tensor([[  2.7770,   1.4588],
        [  1.0969,   0.2394],
        [-17.8047,  -9.7676],
        [ 25.0631,  13.3193],
        [  1.4069,   0.3930],
        [-12.0121,  -6.7474],
        [  3.5965,   1.8925],
        [-17.4400,  -9.5887],
        [ 24.2437,  12.8856],
        [-12.5215,  -7.0275],
        [  1.9164,   0.6732],
        [-18.1147,  -9.9211],
        [ 25.5725,  13.5995],
        [-12.8315,  -7.1811],
        [  3.0870,   1.6123]], grad_fn=<AddmmBackward0>)

In [67]:
#loss function
#import nn.functional
import torch.nn.functional as F


In [68]:
#Define loss function
loss_fn=F.mse_loss

In [69]:
pred=model(inputs)
loss=loss_fn(pred,targets)

optimizer

In [70]:
#Define optimizer
opt=torch.optim.SGD(model.parameters(),lr=1e-5)

In [81]:
#Utility function to train the model

def fit(num_epochs,model,loss_fn,opt,train_dl):
  #repeat for given number of epochs
  for epoch in range(num_epochs):

    #Train with batches of data
    for xb,yb in train_dl:
      # 1. Generate predictions

      pred=model(xb)

      # 2. Calculate loss
      loss=loss_fn(pred,yb)

      # 3. Compute gradients
      loss.backward()

      #4.Update parameters using gradients
      opt.step()

      #5.Reset the gradients to zero
      opt.zero_grad()

    #print the progress
    if(epoch +1)%10 ==0:
      print('Epoch [{}/{}], loss:{:.4f}'.format(epoch+1,num_epochs,loss.item()))


In [80]:
fit(100,model,loss_fn,opt,train_dl)

torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
Epoch [10/100], loss:1164.2222
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])
torch.Size([5, 3])


In [73]:
pred=model(inputs)
pred

tensor([[ 61.8059,  74.9104],
        [ 79.5674,  96.3778],
        [ 97.0493, 114.9790],
        [ 63.8861,  78.6132],
        [ 79.1933,  96.0156],
        [ 73.7316,  88.9805],
        [ 61.8148,  74.9791],
        [ 97.5327, 115.6133],
        [ 63.8771,  78.5445],
        [ 73.3486,  88.5495],
        [ 79.5764,  96.4466],
        [ 97.4233, 115.3412],
        [ 64.2691,  79.0442],
        [ 73.7226,  88.9117],
        [ 61.4318,  74.5481]], grad_fn=<AddmmBackward0>)

In [74]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])

In [75]:
model(torch.tensor([[75,63,44.]]))

tensor([[61.0255, 74.2216]], grad_fn=<AddmmBackward0>)

In [76]:
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 91.,  87.,  65.],
        [ 69.,  96.,  70.],
        [ 74.,  66.,  43.],
        [ 88., 134.,  59.],
        [101.,  44.,  37.],
        [ 68.,  96.,  71.],
        [ 92.,  87.,  64.],
        [ 87., 135.,  57.],
        [103.,  43.,  36.],
        [ 68.,  97.,  70.],
        [ 73.,  66.,  44.]])