## Pytorch Starts here

Resource: https://www.learnpytorch.io/

Questions: https://github.com/mrdbourke/pytorch-deep-learning/projects?query=is%3Aopen

In [9]:
print('Learning PyTorch by Yomi')

Learning PyTorch by Yomi


In [10]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.6.0+cu124


In [11]:
!nvidia-smi

Fri Apr 18 13:30:06 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   42C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

Tensors

In [12]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [13]:
scalar.ndim

0

In [14]:
#Get tensor back as Python int

scalar.item()

7

In [15]:
#Vector

vector = torch.tensor([3,8])
vector

tensor([3, 8])

In [16]:
vector.ndim

1

In [17]:
vector.shape

torch.Size([2])

In [18]:
#matrix

MATRIX = torch.tensor([[3,5],
                      [4,8]])
MATRIX

tensor([[3, 5],
        [4, 8]])

In [19]:
MATRIX[0]

tensor([3, 5])

In [20]:
print('the dimension of the matrix is: ', MATRIX.ndim)
print('the shape of the matrix is: ', MATRIX.shape)

the dimension of the matrix is:  2
the shape of the matrix is:  torch.Size([2, 2])


In [21]:
# TENSOR

TENSOR = torch.tensor([[[1, 2, 3],
                       [4,5,6],
                       [7,8,9]]])
TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [22]:
print('dimension of tensor',TENSOR.ndim)
print('shape of tensor', TENSOR.shape)

dimension of tensor 3
shape of tensor torch.Size([1, 3, 3])


In [23]:
### Random tensors

# Create a random tensor of size (3,4)
random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.0339, 0.3898, 0.6219, 0.2526],
        [0.8128, 0.7502, 0.4486, 0.3441],
        [0.9346, 0.7582, 0.2110, 0.9958]])

In [24]:
# Create a random tensor with similar shape to an image tensor

random_image_size_tensor = torch.rand(size=(3,224,224)) # height, weight, color channels (R,G,B)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([3, 224, 224]), 3)

## zeros and ones

In [25]:
# crate a tensor of all zeros

zeros = torch.zeros(4,5)
zeros

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [26]:
# create a tensor of all ones

ones = torch.ones(3,2)
ones

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])

In [27]:
ones.dtype

torch.float32

### Creating a range of tensors and tensors-like

In [28]:
# Use torch.range()

one_to_ten = torch.range(0, 10)
one_to_ten

  one_to_ten = torch.range(0, 10)


tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [29]:
one_to_ten = torch.arange(start=0, end=10, step=0.5)
one_to_ten

tensor([0.0000, 0.5000, 1.0000, 1.5000, 2.0000, 2.5000, 3.0000, 3.5000, 4.0000,
        4.5000, 5.0000, 5.5000, 6.0000, 6.5000, 7.0000, 7.5000, 8.0000, 8.5000,
        9.0000, 9.5000])

In [30]:
# Creating tensors-like

ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

## Tensor datatypes

In [31]:
# Float 32 tensor

float_32_tensor = torch.tensor([3.0, 5.0, 8.0],
                               dtype=None, device=None, requires_grad=False)

float_32_tensor

tensor([3., 5., 8.])

In [32]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 5., 8.], dtype=torch.float16)

In [33]:
float_16_tensor * float_32_tensor

tensor([ 9., 25., 64.])

## Getting info from tensors: Tensor Attributes

In [34]:
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.3639, 0.9617, 0.6170, 0.1869],
        [0.1830, 0.8498, 0.4001, 0.6952],
        [0.0519, 0.7376, 0.7870, 0.9095]])

In [35]:
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device tensor is on: {some_tensor.device}")

Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device tensor is on: cpu


## Manipulating Tensors (+, -, x, /, matrix multiplication)

In [36]:
# Create a tensor and add 10 to it

tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [37]:
# multiply tensor by 10

tensor * 10

tensor([10, 20, 30])

In [38]:
# Subtract 10 from tensor

tensor - 10

tensor([-9, -8, -7])

In [39]:
# Try out PyTorch in-built functions

torch.mul(tensor, 10)

tensor([10, 20, 30])

In [40]:
torch.add(tensor, 10)

tensor([11, 12, 13])

# Matrix Multiplication

### 2 Ways: Element-wise and Dot product

### Also remember that the inner dimensions must match i.e a matmult of (2,3) and (3,2) will work but (2,3) and (2,3) won't

### To multiply two matrices of equal shapes, we can use transpose for one of them

In [41]:
# Element mult
print(tensor, "*", tensor)
print(f'Equals: {tensor * tensor}')

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [42]:
#matrix mult i.e. dot product

torch.matmul(tensor, tensor)

tensor(14)

In [43]:
tensor_A = torch.tensor([[1, 2, 3], [3,4,5], [6,7,8], [2,5,8]])
tensor_A.shape

torch.Size([4, 3])

In [44]:
tensor_B = torch.tensor([[3,2,5], [4,6,2], [5,2,1], [0,8,7]])
tensor_B.shape

torch.Size([4, 3])

In [45]:
# torch.matmul(tensor_A, tensor_B)

In [46]:
# to transpose a tensor, use the dot T function
tensor_B.T, tensor_B.T.shape

(tensor([[3, 4, 5, 0],
         [2, 6, 2, 8],
         [5, 2, 1, 7]]),
 torch.Size([3, 4]))

In [47]:
tensor_ABtranspose = torch.matmul(tensor_A, tensor_B.T)
print(tensor_ABtranspose, tensor_ABtranspose.shape)

tensor([[ 22,  22,  12,  37],
        [ 42,  46,  28,  67],
        [ 72,  82,  52, 112],
        [ 56,  54,  28,  96]]) torch.Size([4, 4])


### Finding the min, max, mean, sum, etc (Tensor aggregation)

In [48]:
# Create tensor
x = torch.arange(0,100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [49]:
# Find the min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [50]:
torch.max(x), x.max()

(tensor(90), tensor(90))

In [51]:
# Find the mean
#torch.mean(x)

#why did it fail? it failed because of the datatype. Hence we need to change the data type

In [52]:
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [53]:
# Find the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

### Finding the positional min and max

In [54]:
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [55]:
#Find the positon in tensor that has the min value with argmin() => returns index

x.argmin()

tensor(0)

In [56]:
x.argmax()

tensor(9)

In [57]:
x[9]

tensor(90)

### Reshaping, stacking, squeezing, and unsqueezing tensors


In [58]:
import torch
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [59]:
x_reshaped = x.reshape(1,9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [60]:
# Add an extra dimension

x_reshaped = x.reshape(1,9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [61]:
# Change the view

z = x.view(1,9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [62]:
# Changing z changes x (because the view of a tensor shares the same memory as the original input)

z[:, 0] = 5
z, x


(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [63]:
# Stck tensors on top of each other

x_stacked = torch.stack([x,x,x,x], dim=0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [64]:
#torch.squeeze removes all single dimensions from a target tensor

print(f'Previous tensor: {x_reshaped}')
print(f'Previous shape: {x_reshaped.shape}')

# Remove extra dimensions from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f'\nNew tensor: {x_squeezed}')
print(f'New Shape: {x_squeezed.shape}')

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 9])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New Shape: torch.Size([9])


In [65]:
# torch.unsqueeze() - adds a single dimension to a target tensor a a specific dim

print(f'Previous target: {x_squeezed}')
print(f'Previous shape: {x_squeezed.shape}')

#Add an extra dimension with unsqueeze

x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f'\nNew tensor: {x_unsqueezed}')
print(f'New shape: {x_unsqueezed.shape}')

Previous target: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 9])


In [66]:
#torch.permute - rearranges the dimensions of a target tensor in a specified order
x_original = torch.rand(size= (224, 224, 3))

#Permute the original tensor to rearrange the axis (or dim) order
x_permuted = x_original.permute(2, 0, 1) #i.e shifts axis 2-> 0, 0->1, and 1->2

print(f'Previous shape: {x_original.shape}')
print(f'New shape: {x_permuted.shape}')

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [67]:
x_original

tensor([[[0.3313, 0.9792, 0.5530],
         [0.4669, 0.2178, 0.0413],
         [0.3446, 0.5812, 0.0544],
         ...,
         [0.1154, 0.6114, 0.2007],
         [0.1290, 0.4360, 0.0111],
         [0.0587, 0.3744, 0.3865]],

        [[0.4274, 0.7428, 0.7263],
         [0.7246, 0.7789, 0.1540],
         [0.2460, 0.6514, 0.7986],
         ...,
         [0.7780, 0.9049, 0.4597],
         [0.8976, 0.3959, 0.7885],
         [0.6421, 0.0928, 0.7471]],

        [[0.1693, 0.6176, 0.8089],
         [0.8629, 0.7237, 0.8871],
         [0.9838, 0.3849, 0.3467],
         ...,
         [0.8621, 0.2924, 0.4281],
         [0.6164, 0.8689, 0.3577],
         [0.8229, 0.3191, 0.8504]],

        ...,

        [[0.3695, 0.4716, 0.0787],
         [0.5974, 0.9939, 0.6494],
         [0.6588, 0.2826, 0.4800],
         ...,
         [0.5105, 0.6220, 0.1344],
         [0.6171, 0.0325, 0.3841],
         [0.2019, 0.4861, 0.3788]],

        [[0.8997, 0.1708, 0.7233],
         [0.0044, 0.4910, 0.9093],
         [0.

## Indexing (selecting data from tensors)

similar in PyTorch as with NumPy

In [68]:
# Create a tensor

x = torch.arange(1,10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [69]:
# Let's index on our new tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [70]:
x[0][0]

tensor([1, 2, 3])

In [71]:
x[0][1][2]

tensor(6)

In [72]:
# You can also use ':' to select all of the target dimension

x[:, 0]

tensor([[1, 2, 3]])

In [73]:
# Get all values of the 0th and 1st dimension but only index 1 of 2nd dimension

x[:, :, 2]

tensor([[3, 6, 9]])

In [74]:
#Get all the values of the 0 dimension but only 1 index value of the 1st and 2nd dimension
x[:,1,1]

tensor([5])

In [75]:
# Get index 0 of the 0th and 1st dimension and all values of the 2nd dimesion

x[0,0,:]

tensor([1, 2, 3])

In [76]:
# Index on x to return 9
print (x[0][2][2])

# index on x to return 2,5,8
print (x[:,:,1])

tensor(9)
tensor([[2, 5, 8]])


## PyTorch tensors and NumPy


*   Data in NumPy wanted in PyTorch tensor -> torch.from_numpy(ndarray)
*   PyTorch tensor -> NumPy -> torch.Tensor.numpy()



In [77]:
# NumPy array to tensor
import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) #when converting from numpy to pytorch, pytorch converts to float64 which is the default for numpy. you can change the datatype for pytorch yourself
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [78]:
# Change the value of the array and see what this will do to the tensor

array = array+1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [79]:
#Tensor to Numpy array

tensor = torch.ones(5)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1.]), array([1., 1., 1., 1., 1.], dtype=float32))

In [80]:
# Change the tensor, what happens to the numpy_tensor?

tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2.]), array([1., 1., 1., 1., 1.], dtype=float32))

## Reprodcuibility (trying to take random out of random)

In short how a neural network learns:

start with random numbers -> tensor oerations -> update random numbers to try and make them better representations of the data again -> again -> again...

To reduce the randomness in NN and pytorch comes the concept of a **random seed**

In [81]:
# Create two random tensors

random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.6394, 0.1629, 0.7520, 0.0506],
        [0.6769, 0.8611, 0.9540, 0.7438],
        [0.9216, 0.3255, 0.5302, 0.8923]])
tensor([[0.7835, 0.4381, 0.3237, 0.8141],
        [0.8834, 0.1299, 0.1328, 0.6372],
        [0.3275, 0.0256, 0.2671, 0.9781]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [82]:
# Let's make some random but reproducible tensors

# set the random seed
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)

random_tensor_C = torch.rand(3,4)
random_tensor_D = torch.rand(3,4)

print(random_tensor_C)
print(random_tensor_D)

print(random_tensor_C == random_tensor_D)


tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [83]:
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3,4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3,4)

print(random_tensor_C)
print(random_tensor_D)

print(random_tensor_C == random_tensor_D)


tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and pytorch objects on the GPUs (and making faster computations)

In [84]:
!nvidia-smi

Fri Apr 18 13:30:07 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   42C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## Check for GPU access with PyTorch

In [85]:
# Check for GPU access with PyTorch

import torch
torch.cuda.is_available()

True

In [86]:
# Setu device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

It's best practice to setup device agnostic if youre not sure your code is running on a CPU or a GPU -

In [87]:
# Count number of devices
torch.cuda.device_count()

1

## Putting tensrs (and models) on the GPU

The reason we wnat our tensors/models on the GPU is because it runs faster computations

In [88]:
# Create a tensor (default on the CPU)

tensor = torch.tensor([1,2,3], device='cpu')

#Tensor not on GPU
print (tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [89]:
# Move tensor to GPU if available
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

### Moving Tensors back to the GPU

NumPy works only on CPU

In [91]:
# if tensor is on GPU, can't transform it to Numpy
#tensor_on_gpu.numpy()

In [92]:
#To fix the GPU tensor with numpy issue, we can first set it to the CPU
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])