Import PyTorch libraries

## 00. PyTorch Fundamentals

https://www.learnpytorch.io/00_pytorch_fundamentals/

In [1]:
import torch
from torch import nn
torch.__version__

'2.7.0'

Scalar

In [4]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [5]:
scalar.ndim

0

In [6]:
scalar.item()

7

In [9]:
#Vectors
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [12]:
print(f"dimension= {vector.ndim} \nShape= {vector.shape}")


dimension= 1 
Shape= torch.Size([2])


In [19]:
#Matrix
MATRIX = torch.tensor([[7,8], [9,10]])
print(f"{MATRIX} \n{MATRIX.ndim} \n{MATRIX.shape}")

tensor([[ 7,  8],
        [ 9, 10]]) 
2 
torch.Size([2, 2])


In [20]:
#Tensor
# Tensor
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
print(f"{TENSOR} \n{TENSOR.ndim} \n{TENSOR.shape}")

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]]) 
3 
torch.Size([1, 3, 3])


it outputs torch.Size([1, 3, 3]).

The dimensions go outer to inner.

That means there's 1 dimension of 3 by 3.

![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [21]:
#Create a random tensor of size (3,4) using torch.rand()
random_tensor = torch.rand(3, 4)
random_tensor, random_tensor.dtype

(tensor([[0.6146, 0.6396, 0.8045, 0.7425],
         [0.6365, 0.3016, 0.4968, 0.2902],
         [0.2461, 0.6996, 0.1061, 0.1537]]),
 torch.float32)

In [22]:
# Create a random tensor of size (224, 224, 3)
random_image_size_tensor = torch.rand(size=(224, 224, 3))
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [23]:
#create a tensor of all zeros using torch.zeros()
zeros_tensor = torch.zeros(size=(3, 4))
zeros_tensor, zeros_tensor.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [24]:
#so does with a tensor of all ones using torch.ones()
ones_tensor = torch.ones(size=(3, 4))
ones_tensor, ones_tensor.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

In [26]:
#creating a range of numbers using torch.arange()

#use torch.arange(), torch,range() is deprecated
arange_tensor = torch.arange(start=0, end=10, step=2)
arange_tensor, arange_tensor.dtype

(tensor([0, 2, 4, 6, 8]), torch.int64)

In [27]:
#create a tensor of zeros like another tensor using torch.zeros_like()
zeros_like_tensor = torch.zeros_like(random_tensor)
zeros_like_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [28]:
# Default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # defaults to None, which is torch.float32 or whatever datatype is passed
                               device=None, # defaults to None, which uses the default tensor type
                               requires_grad=False) # if True, operations performed on the tensor are recorded 

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

Getting information from tensors

Once you've created tensors (or someone else or a PyTorch module has created them for you), you might want to get some information from them.

We've seen these before but three of the most common attributes you'll want to find out about tensors are:

shape - what shape is the tensor? (some operations require specific shape rules)
dtype - what datatype are the elements within the tensor stored in?
device - what device is the tensor stored on? (usually GPU or CPU)
Let's create a random tensor and find out details about it.

In [29]:
# Create a tensor
some_tensor = torch.rand(3, 4)

# Find out details about it
print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}") # will default to CPU

tensor([[0.0112, 0.5090, 0.9358, 0.9001],
        [0.1547, 0.6896, 0.1806, 0.5288],
        [0.4466, 0.3712, 0.1084, 0.0738]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


Note: When you run into issues in PyTorch, it's very often one to do with one of the three attributes above. So when the error messages show up, sing yourself a little song called "what, what, where":

"what shape are my tensors? what datatype are they and where are they stored? what shape, what datatype, where where where"


### Basic Operations

In [30]:
#create a tensor of values and add a number to it
tensor = torch.tensor([1, 2, 3])
tensor + 10, tensor - 10, tensor * 10, tensor / 10

(tensor([11, 12, 13]),
 tensor([-9, -8, -7]),
 tensor([10, 20, 30]),
 tensor([0.1000, 0.2000, 0.3000]))

In [31]:
torch.multiply(tensor, 10)

tensor([10, 20, 30])

In [32]:
tensor

tensor([1, 2, 3])

In [33]:
print (tensor, "*", tensor)
print("Equals:", tensor * tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


#### MATRIX multiplication (is all you need)

THE MAIN TWO RULES FOR MATRIX MULTIPLICATION TO REMEBER ARE:
- THE INNER DIMENSIONS MUST MATCH:
    + (3,2) @ (3,2) WON'T WORK
    + (2,3) @ (3,2) WILL WORK
    + (3,2) @ (2,3) WILL WORK

- THE RESULTING MATRIX HAS THE SHAPE OF THE *OUTER DIMENSIONS*:
    + (2,3) @ (3,2) -> (2,2)
    + (3,2) @ (2,3) -> (3,3)

Note: "@" in Python is the symbol for matrix multiplication

In [34]:
tensor = torch.tensor([1, 2, 3])
tensor.shape

torch.Size([3])

![image.png](attachment:image.png)

In [35]:
# Element-wise matrix multiplication
tensor * tensor

tensor([1, 4, 9])

In [37]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [38]:
# Can also use the @ operator for matrix multiplication, though not recommended for tensors
tensor @ tensor

tensor(14)

In [39]:
%%time
# Matrix multiplication by hand 
# (avoid doing operations with for loops at all cost, they are computationally expensive)
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
value

CPU times: user 589 μs, sys: 936 μs, total: 1.52 ms
Wall time: 2.2 ms


tensor(14)

In [40]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 170 μs, sys: 46 μs, total: 216 μs
Wall time: 199 μs


tensor(14)

**One of the most common errors in deep learning (shape errors)**

Because much of deep learning is multiplying and performing operations on matrices and matrices have a strict rule about what shapes and sizes can be combined, one of the most common errors you'll run into in deep learning is shape mismatches.


In [41]:
# Shapes need to be in the right way  
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)

torch.matmul(tensor_A, tensor_B) # (this will error)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

We can make matrix multiplication work between tensor_A and tensor_B by making their inner dimensions match.

One of the ways to do this is with a transpose (switch the dimensions of a given tensor).

You can perform transposes in PyTorch using either:

    + torch.transpose(input, dim0, dim1) - where input is the desired tensor to transpose and dim0 and dim1 are the dimensions to be swapped.
    + tensor.T - where tensor is the desired tensor to transpose.


In [42]:
# view tenspr_A and tensor_B
print(tensor_A)
print(tensor_B)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])


In [43]:
# View tensor_A and tensor_B.T
print(tensor_A)
print(tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [44]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output) 
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


In [45]:
# You can also use torch.mm() which is a short for torch.matmul().
# torch.mm is a shortcut for matmul
torch.mm(tensor_A, tensor_B.T)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

![00-matrix-multiply-crop.gif](attachment:00-matrix-multiply-crop.gif)

    Note: A matrix multiplication like this is also referred to as the **dot product** of two matrices

![image.png](attachment:image.png)

In [49]:
# Since the linear layer starts with a random weights matrix, let's make it reproducible (more on this later)
torch.manual_seed(42)
# This uses matrix multiplication
linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input 
                         out_features=6) # out_features = describes outer value 
x = tensor_A
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[2.2368, 1.2292, 0.4714, 0.3864, 0.1309, 0.9838],
        [4.4919, 2.1970, 0.4469, 0.5285, 0.3401, 2.4777],
        [6.7469, 3.1648, 0.4224, 0.6705, 0.5493, 3.9716]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


#### Finding the min, max, mean, sum, etc (aggregation)

In [50]:
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [51]:
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# print(f"Mean: {x.mean()}") # this will error
print(f"Mean: {x.type(torch.float32).mean()}") # won't work without float datatype
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


In [52]:
torch.max(x), torch.min(x), torch.mean(x.type(torch.float32)), torch.sum(x)

(tensor(90), tensor(0), tensor(45.), tensor(450))

Positional min/max

You can also find the index of a tensor where the max or minimum occurs with torch.argmax() and torch.argmin() respectively.

This is helpful incase you just want the position where the highest (or lowest) value is and not the actual value itself (we'll see this in a later section when using the softmax activation function).


In [53]:
# Create a tensor
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Returns index of max and min values
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0


In [54]:
# Create a tensor and check its datatype
tensor = torch.arange(10., 100., 10.)
tensor.dtype

torch.float32

In [55]:
# Create a float16 tensor
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)

In [56]:
# Create an int8 tensor
tensor_int8 = tensor.type(torch.int8)
tensor_int8

tensor([10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=torch.int8)

#### Reshaping, stacking, squeezing, and unsqueezing

![image.png](attachment:image.png)

In [57]:
x = torch.arange(1., 8.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.Size([7]))

In [58]:
#add an extra dimension to x
x_reshaped = x.reshape(1,7)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [59]:
# Change view (keeps same data as original but changes view)
# See more: https://stackoverflow.com/a/54507446/7900723
z = x.view(1, 7)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7.]]), torch.Size([1, 7]))

In [60]:
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7.]]), tensor([5., 2., 3., 4., 5., 6., 7.]))

In [64]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=1) # try changing dim to dim=1 and see what happens
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.]])

In [65]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7.]])
Previous shape: torch.Size([1, 7])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7.])
New shape: torch.Size([7])


In [66]:
# Create tensor with specific shape
x_original = torch.rand(size=(224, 224, 3))

# Permute the original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}")

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


#### Idexing (selecting data from tensors)