# Fundamentals of PyTorch

In [50]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# print(torch.__version__)

## Introduction to Tensors

<img src="https://miro.medium.com/v2/resize:fit:1008/0*zcidDaCCmJeD8y-9.png"  height="100" alt="Difference between scalar vector matrix and tensor">

Creating tensors using **torch.tensor()**

### Convention

- scalar and vector variables are written in smaller letters

- MATRIX and VECTOR variables are written in all capital letters

In [51]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [52]:
# Get tensor back as a Python int
scalar.item()

7

### Vector
Vector has a magnitude and direction

In [53]:
vector = torch.tensor([7,7])
vector

tensor([7, 7])

### _Dimensions_

Easy way to remember is ->
**dimension = number of square brackets**  

In [54]:
# get the dimensions of tensor
vector.ndim

1

### _Shape_

eg: [7, 7] - we have **2** x 1 elements

In [55]:
vector.shape

torch.Size([2])

### MATRIX

In [56]:
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])

print(MATRIX.ndim)
print(MATRIX.shape)

# accessing the first row
print(MATRIX[0])

2
torch.Size([2, 2])
tensor([7, 8])


### TENSOR

In [57]:
TENSOR = torch.tensor([[[1,2,3],
                        [3,6,9],
                        [2,4,5]]])

print(TENSOR)

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])


### _Dimension of a Tensor_

<img src="https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/images/00-pytorch-different-tensor-dimensions.png" height=250>

In [58]:
print(TENSOR.ndim)
print(TENSOR.shape)

3
torch.Size([1, 3, 3])


## Random Tensors

**Why random tensors?**  
Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent data

`Start with random number -> Look at Data -> Update random numbers -> Look at Data -> Update random numbers`


In [59]:
# Creating random tensor of size (3, 4)

random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.8343, 0.5298, 0.1427, 0.7909],
        [0.8427, 0.5346, 0.7449, 0.4143],
        [0.9988, 0.6468, 0.0275, 0.3090]])

In [60]:
# Creating random tensor with similar shape to Image Tensor

random_image_size_tensor = torch.rand(size=(224,224,3)) # size(height, width, color channels)

print(random_image_size_tensor.shape)
print(random_image_size_tensor.ndim)

torch.Size([224, 224, 3])
3


In [61]:
# Create a tensor of all zeros

zeros = torch.zeros(size=(3,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [62]:
# Create a tensor of all ones

ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [63]:
ones.dtype

torch.float32

## Creating a Range of Tensors and Tensors-like

In [64]:
# Range of Tensors

# one_to_ten = torch.arange(0, 10)

# one_to_ten = torch.arange(start=0, end=10, step =2)\

one_to_ten = torch.arange(0, 100, 5)

one_to_ten

tensor([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,
        90, 95])

In [65]:
# Creating Tensors Like

ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Tensor DataTypes

In [66]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type
                               requires_grad=False) # if True, operations performed on the tensor are recorded

print(f"Shape of the tensor is: {float_32_tensor.shape}")
print(f"Datatype of the tensor is {float_32_tensor.dtype}")
print(f"Tensor is running on the following device: {float_32_tensor.device}")


Shape of the tensor is: torch.Size([3])
Datatype of the tensor is torch.float32
Tensor is running on the following device: cpu


In [67]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

## Manipulating Tensors

Tensor Operations include:
- Addition
- Subtraction
- Multiplication
- Division
- Matrix Multiplication

In [68]:
# ADD 10 to tensor

tensor = torch.tensor([1,2,3])

tensor + 10

tensor([11, 12, 13])

In [69]:
# MULTIPLY by 10

tensor * 10

tensor([10, 20, 30])

In [70]:
# Subtract by 10

tensor - 10

tensor([-9, -8, -7])

In [71]:
# Divide by 10

tensor / 2

tensor([0.5000, 1.0000, 1.5000])

### Using In Built Functions

In [72]:
torch.mul(tensor, 10)

tensor([10, 20, 30])

In [73]:
torch.add(tensor, 10)

tensor([11, 12, 13])

## Matrix Multiplication (most common neural network operation)

- Element wise multiplication
- Dot Product

### Two rules to keep in mind while performing matrix multiplication

1. **Inner Dimensions** must match
- `(3, 2) @ (3, 2)` wont work ❌
- `(2, 3) @ (3, 2)` will work ✅
- `(3, 2) @ (2, 3)` will work ✅

2. Resulting matrix has the shape of outer dimension

- `(2, 3) @ (3, 2)` => `(2, 2)`

In [74]:
 # Element wise multiplication

print(tensor, "x",  tensor)
print(f"equals: {tensor * tensor}")

tensor([1, 2, 3]) x tensor([1, 2, 3])
equals: tensor([1, 4, 9])


In [75]:
# Matrix Multiplication
%%time
print(tensor, "x",  tensor)
torch.matmul(tensor, tensor)

tensor([1, 2, 3]) x tensor([1, 2, 3])
CPU times: user 624 µs, sys: 0 ns, total: 624 µs
Wall time: 633 µs


tensor(14)

### `Shape Errors`: One of the most common errors in Matrix Multiplication

In [77]:
# Shapes for matrix multiplication

tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])

tensor_B = torch.tensor([[2,4],
                         [6,12],
                         [8,16]])

print(tensor_A.shape, tensor_B.shape)

# Their shapes are different hence they will fail
torch.mm(tensor_A, tensor_B)

torch.Size([3, 2]) torch.Size([3, 2])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

### `Transpose` of the matrix to fix this error

In [78]:
tensor_B.T

print(f"Transposed matrixB:")
print(f"{tensor_B.T}")

print(f"and its shape is: {tensor_B.T.shape}")

Transposed matrixB:
tensor([[ 2,  6,  8],
        [ 4, 12, 16]])
and its shape is: torch.Size([2, 3])


In [79]:
print(tensor_A)
print(tensor_B.T)

print(f"\nShapes of both matrix {tensor_A.shape} {tensor_B.T.shape}")

print("\nMatrix Multiplying these tensors: \n")
torch.matmul(tensor_A, tensor_B.T)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[ 2,  6,  8],
        [ 4, 12, 16]])

Shapes of both matrix torch.Size([3, 2]) torch.Size([2, 3])

Matrix Multiplying these tensors: 



tensor([[ 10,  30,  40],
        [ 22,  66,  88],
        [ 34, 102, 136]])

## Tensor Aggregration

- Min
- Max
- Mean
- Sum

In [80]:
# Creat a tensor

x = torch.arange(0,100,10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [81]:
# Find the Min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [82]:
# Find the Max
torch.max(x), x.max()

(tensor(90), tensor(90))

### `DType Error`

In [84]:
# Find the mean(average)
torch.mean(x) # x has dtype int64

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [85]:
# Finding mean by changing dtype of tensor
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [86]:
# Finding the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

## Positional Min, Max

- returns the index at which the min / max element is located

In [87]:
# Finding positional min

print(x)

x.argmin() # returns index of min position

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])


tensor(0)

In [88]:
x.argmax() # returns index of min position

tensor(9)

## Reshaping, Stacking, Squeezing and Unsqueezing tensors

| Method |	One-line description |
| ------ | ---------------------- |
|torch.reshape(input, shape) | 	Reshapes input to shape (if compatible), can also use torch.Tensor.reshape(). |
Tensor.view(shape) | 	Returns a view of the original tensor in a different shape but shares the same data as the original tensor. |
torch.stack(tensors, dim=0) |	Concatenates a sequence of tensors along a new dimension (dim), all tensors must be same size.|
torch.squeeze(input) |	Squeezes input to remove all the dimenions with value 1. |
torch.unsqueeze(input, dim) |	Returns input with a dimension value of 1 added at dim. |
torch.permute(input, dims) |	Returns a view of the original input with its dimensions permuted (rearranged) to dims. |

### Shape

In [89]:
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [90]:
# Reshape - Add extra dimension

x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [91]:
x_reshaped = x.reshape(9,1)
x_reshaped, x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [92]:
x_reshaped = x.reshape(3,3)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]),
 torch.Size([3, 3]))

### `Shape Errors`

In [93]:
# Here we will get an error becuase ->
# we are trying to squeeze 9 elements into 7 elements

x_reshaped = x.reshape(1, 7)
x_reshaped, x_reshaped.shape

RuntimeError: shape '[1, 7]' is invalid for input of size 9

In [94]:
# Here we will get an error becuase ->
# we are trying to squeeze 9 elements into 18 (9 x 2)

x_reshaped = x.reshape(2, 9)
x_reshaped, x_reshaped.shape

RuntimeError: shape '[2, 9]' is invalid for input of size 9

### View

View is similar to shape, but ->  
the only difference is view **shares the same memory** as original tensor

In [95]:
z = x.view(1,9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [96]:
# changing z will change x(because their memory is shared)

print("original z and x:")
print(z, x)

z[:, 0] = 5

print("\nModifying z leads to modification of x too\n")
print("modified z and x:")
print(z, x)

original z and x:
tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]) tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])

Modifying z leads to modification of x too

modified z and x:
tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]) tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])


### Stack

Stacking tensors on top of each other

In [97]:
x = torch.arange(0, 10, 2)
x

tensor([0, 2, 4, 6, 8])

In [98]:
# Stacking vertically

x_stacked = torch.stack([x,x,x,x], dim = 0)
x_stacked

tensor([[0, 2, 4, 6, 8],
        [0, 2, 4, 6, 8],
        [0, 2, 4, 6, 8],
        [0, 2, 4, 6, 8]])

In [99]:
# Stacking horizontally

x_stacked = torch.stack([x,x,x,x], dim = 1)
x_stacked

tensor([[0, 0, 0, 0],
        [2, 2, 2, 2],
        [4, 4, 4, 4],
        [6, 6, 6, 6],
        [8, 8, 8, 8]])

### Squeeze

Remove all single dimensions from a target tensor

In [100]:
# original tensor
x = x.reshape(1,5)
x, x.shape

(tensor([[0, 2, 4, 6, 8]]), torch.Size([1, 5]))

In [101]:
# changes dimension from (1, 5) to (5)
x.squeeze(), x.squeeze().shape

(tensor([0, 2, 4, 6, 8]), torch.Size([5]))

### Unsqueeze
add a single dimension to target tensor at specific dim

In [102]:
# original tensor
x, x.shape

(tensor([[0, 2, 4, 6, 8]]), torch.Size([1, 5]))

In [103]:
# changes dimension from (1, 5) to (1, 5, 1)
x.unsqueeze(dim=2), x.unsqueeze(dim=2).shape

(tensor([[[0],
          [2],
          [4],
          [6],
          [8]]]),
 torch.Size([1, 5, 1]))

In [104]:
# changes dimension from (1, 5) to (1, 1, 5)
x.unsqueeze(dim=0), x.unsqueeze(dim=0).shape

(tensor([[[0, 2, 4, 6, 8]]]), torch.Size([1, 1, 5]))

### Permute
Rearranges the dimensions of a target tensor in a specific order

`permute(2, 0, 1)`  
what this says is **swapping:**  
- 0th dimension to 2nd  
- 1st dimension to 0th  
- 2nd dimension to 1


In [105]:
# original image tensor
x_original = torch.rand(size=(224,224,3)) # [height, width, color_channels]
x_original.shape

torch.Size([224, 224, 3])

In [106]:
# permite the original tensor
x_permutted = x_original.permute(2,0,1)
x_permutted.shape # [color_channels, height,  width]

torch.Size([3, 224, 224])

### Selecting data from tensors

In [107]:
print(x_original.shape)

torch.Size([224, 224, 3])


In [108]:
# Selecting an element
x_original[0,0,0]

tensor(0.7924)

In [109]:
## Resetting an element
x_original[0,0,0] = 0.6969
x_original[0,0,0]

tensor(0.6969)

### Indexing (selecting data from tensors)

Indexing with pyTorch is similar in indexing with numpy

In [110]:
# create a tensor
import torch

x = torch.arange(1,10).reshape(1,3,3)
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [111]:
 # Indexing 0th dimension

x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [112]:
 # Indexing 1st dimension

x[0][1]

tensor([4, 5, 6])

In [113]:
 # Indexing 2nd dimension

x[0][1][1]

tensor(5)

In [114]:
# select all of nth dimension with ":"

x[:, 0] # select all of 0th dimension

tensor([[1, 2, 3]])

In [115]:
 # get all values of 0th and 1st dimension, but only index 1 of 2nd dimension

x[:, : , 1]

tensor([[2, 5, 8]])

In [116]:
# get all values of the 0 dimension but only 1 index value of 1st and 2nd dimension

x[:, 1, 1]

tensor([5])

In [117]:
# get index 0 of 0th and 1st dimension and all values of 2nd dimension

x[0, 0, :]

tensor([1, 2, 3])

In [118]:
# return 9
x[:,2,2]

tensor([9])

In [119]:
# return 3, 6, 9
x[:,:,2]

tensor([[3, 6, 9]])

### Pytorch tensors and Numpy

- Convert data from `numpy -> tensor`
```
torch.from_numpy(mdarray)
```
- Convert data from `tensor -> numpy`
```
torch.Tensor.numpy()
```

#### numpy array to tensor

In [120]:
array = np.arange(1.0, 8.0)

tensor = torch.from_numpy(array)

array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [121]:
# default data types

print(f"default dtype of numpy: {array.dtype}")

print(f"default dtype of tensor: {torch.arange(1,8).dtype}")

default dtype of numpy: float64
default dtype of tensor: torch.int64


In [122]:
# change default dtype of a tensor from numpy data

tensor = torch.from_numpy(array).type(torch.float32)

tensor.dtype

torch.float32

#### Tensor to Numpy array

In [123]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()

In [124]:
numpy_tensor.dtype

dtype('float32')

## Reproducibility

trying to take random out of random  

In short how neural networks work is:  
`Start with random number -> Look at Data -> Update random numbers and make them better representations of the data -> repeat`


To reduce randomness in neural networks and pytorch we use **Random Seed**  


What random seed does is **"flavour the randomness"**

In [125]:
# create two random tensors

random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)

print(random_tensor_A, random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.5775, 0.3780, 0.2601, 0.8581],
        [0.4704, 0.4672, 0.3067, 0.6511],
        [0.8988, 0.9254, 0.1519, 0.0597]]) tensor([[0.0730, 0.0298, 0.8281, 0.1632],
        [0.7357, 0.4794, 0.1518, 0.8530],
        [0.3876, 0.2620, 0.4837, 0.1871]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


- **Important thing to remember is random seed only works for one block of code**

In [126]:
# Creating reproducible tensors

# Setting the random seed
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)

random_tensor_C = torch.rand(3,4) # C is flavoured with seed
random_tensor_D = torch.rand(3,4) # D isn't flavoured with seed

print(random_tensor_C, random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]]) tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [127]:
# Creating reproducible tensors

# Setting the random seed before creation of each tensor
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3,4) # C is flavoured with seed

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3,4) # D is flavoured with seed

print(random_tensor_C, random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]]) tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## GPU Selection

In [128]:
!nvidia-smi

Sat Apr  6 22:05:52 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P8              11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

### Check for GPU access with pytorch

In [129]:
torch.cuda.is_available()

True

## Setup device agnostic code

In [130]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [131]:
# count number of devices

torch.cuda.device_count()

1

### Putting tensors (and models) on the GPU

The reason we want tensors/models on the GPU is because using a GPU results in faster computations

In [132]:
# Create a tensor (default on the CPU)
tensor = torch.tensor([1,2,3])

print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [134]:
# Create a tensor on GPU
tensor_on_gpu = tensor.to(device) # device = "cuda" (we set this above)

print(tensor, tensor.device)

tensor([1, 2, 3], device='cuda:0') cuda:0


### Moving tensors back to the CPU

In [135]:
# If tensor is on GPU, we can't convert it to NumPy

tensor_back_on_CPU = tensor_on_gpu.cpu().numpy()
tensor_back_on_CPU

array([1, 2, 3])