# 00. Pytorch Fundamentals

Resource github: https://github.com/mrdbourke/pytorch-deep-learning

Resource notebook: https://www.learnpytorch.io/00_pytorch_fundamentals

Loading `pytorch`

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.7.0


## Introduction to tensors

### Creating tensors

PyTorch tensors are created using `torch.tensor()`.

In [2]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [3]:
scalar.ndim

0

In [4]:
# Get tensor back as Python int
scalar.item()

7

In [5]:
# Vector
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [6]:
vector.ndim

1

In [7]:
vector.shape

torch.Size([2])

In [8]:
# Matrix
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [9]:
MATRIX.ndim

2

In [10]:
MATRIX[0]

tensor([7, 8])

In [11]:
MATRIX.shape

torch.Size([2, 2])

In [12]:
# TENSOR
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [13]:
TENSOR.ndim

3

In [14]:
TENSOR.shape

torch.Size([1, 3, 3])

In [15]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])

### Random tensors

In [16]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.8348, 0.6119, 0.3197, 0.2537],
        [0.2607, 0.2738, 0.0871, 0.2625],
        [0.6006, 0.8411, 0.8080, 0.6182]])

In [17]:
# Create a random tensor with shape to an image tensor
random_image_size_tensor = torch.rand(size=(224, 224, 3)) # height, width, color channels
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and ones

In [18]:
# Create a tensor of all zeros
zero = torch.zeros(size=(3, 4))
zero

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [19]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [20]:
ones.dtype

torch.float32

### Creating a range of tensors and tensors-like

In [21]:
# Use torch.range()
torch.arange(0, 10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [22]:
torch.arange(start=0, end=1000, step=77)

tensor([  0,  77, 154, 231, 308, 385, 462, 539, 616, 693, 770, 847, 924])

In [23]:
# Creating tensors like
one_to_ten = torch.arange(start=1, end=11, step=1)
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

**Note**: Tensor datatypes is one of the 3 big errors you'll run into with PyTorch & deep learning:

1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device

In [24]:
# Default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # what datatype is the tensor (e.g. float32 or float16)
                               device=None, # what device is your tensor on (e.g. cpu or cuda)
                               requires_grad=False) # whether to track gradients with this tensors operations

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

In [25]:
float_32_tensor.dtype

torch.float32

In [26]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

### Manipulating Tensors (tensor operations)

Tensor operations include
* Addition/Subtraction
* Multiplication/Division (element-wise)
* Matrix multiplication

In [27]:
# Create a tensor and add 10 to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [28]:
torch.add(tensor, 10)

tensor([11, 12, 13])

In [29]:
# Multiple tensor by 10
tensor * 10

tensor([10, 20, 30])

In [30]:
torch.mul(tensor, 10)

tensor([10, 20, 30])

### Matrix multiplication

Two main ways of performing multiplication in neural networks and deep learning:

1. element-wise multiplication
2. matrix multiplication

In [31]:
import torch
tensor = torch.tensor([1, 2, 3])
tensor.shape

torch.Size([3])

In [32]:
# Element-wise matrix multiplication
tensor * tensor

tensor([1, 4, 9])

In [33]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [34]:
# Can also use the "@" symbol for matrix multiplication, though not recommended
tensor @ tensor

tensor(14)

In [35]:
# Shapes need to be in the right way
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]], dtype=torch.float32)

tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

In [36]:
output_atb = torch.matmul(tensor_A.T, tensor_B)
output_abt = torch.matmul(tensor_A, tensor_B.T)
print(output_atb)
print(output_abt)

tensor([[ 76., 103.],
        [100., 136.]])
tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])



Neural networks are full of matrix multiplications and dot products.

The `torch.nn.Linear()` module (we'll see this in action later on), also known as a feed-forward layer or fully connected layer, implements a matrix multiplication between an input `x` and a weights matrix `W`.

$$Y_{(n \times q)} = X_{(n \times p)} \cdot W_{(p \times q)} +  1_{n} \cdot b_{q}^{\top}$$

In [37]:
# Since the linear layer starts with a random weights matrix, let's make it reproducible (more on this later)
torch.manual_seed(42)
# This uses matrix multiplication
x = torch.rand(3, 2)
linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input
                         out_features=6) # out_features = describes outer value
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[-0.2539,  0.2555,  0.3376,  0.4656, -0.0777,  1.0456],
        [-0.0635, -0.0787,  0.0365,  0.2090, -0.2524,  0.7780],
        [-0.2150,  0.1119, -0.0063,  0.1786, -0.2141,  0.7447]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 6])


### Tensor aggregation
* min / max
* mean / sum
* etc

In [38]:
# Create a tensor
x = torch.rand(10) # from uniform distribution
print(x)
y = torch.randn(10) # from standard normal distribution
print(y)

tensor([0.1053, 0.2695, 0.3588, 0.1994, 0.5472, 0.0062, 0.9516, 0.0753, 0.8860,
        0.5832])
tensor([-0.4220, -1.3323, -0.3639,  0.1513, -0.3514, -0.7906, -0.0915,  0.2352,
         2.2440,  0.5817])


In [39]:
print("min:", torch.min(x), x.min())
print("max:", torch.max(x), x.max())
print("mean:", torch.mean(x), x.mean())
print("sum:", torch.sum(x), x.sum())
print("std:", torch.std(x), x.std())
print("var:", torch.var(x), x.var())
print("argmin:", torch.argmin(x), x.argmin())
print("argmax:", torch.argmax(x), x.argmax())

min: tensor(0.0062) tensor(0.0062)
max: tensor(0.9516) tensor(0.9516)
mean: tensor(0.3982) tensor(0.3982)
sum: tensor(3.9824) tensor(3.9824)
std: tensor(0.3337) tensor(0.3337)
var: tensor(0.1113) tensor(0.1113)
argmin: tensor(5) tensor(5)
argmax: tensor(6) tensor(6)


### Reshaping, stacking, squeezing, and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - return a view of the input with dimensions permuted (swapped) in a certain way

In [40]:
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [41]:
# Add an extra dimension
x_reshaped = x.reshape(3,3)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]),
 torch.Size([3, 3]))

In [42]:
# Let pytorch figure out the number of columns
x.reshape(3, -1)

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [43]:
# Change the view
z = x.view(3,3)
z, z.shape

(tensor([[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]),
 torch.Size([3, 3]))

In [44]:
# Stack tensors on top of each other
x_stacked0 = torch.stack([x, x, x, x], dim=0)
x_stacked0, x_stacked0.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.],
         [1., 2., 3., 4., 5., 6., 7., 8., 9.],
         [1., 2., 3., 4., 5., 6., 7., 8., 9.],
         [1., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 torch.Size([4, 9]))

In [45]:
x_stacked1 = torch.stack([x, x, x, x], dim=1)
x_stacked1, x_stacked1.shape

(tensor([[1., 1., 1., 1.],
         [2., 2., 2., 2.],
         [3., 3., 3., 3.],
         [4., 4., 4., 4.],
         [5., 5., 5., 5.],
         [6., 6., 6., 6.],
         [7., 7., 7., 7.],
         [8., 8., 8., 8.],
         [9., 9., 9., 9.]]),
 torch.Size([9, 4]))

In [46]:
x_vstacked = torch.vstack([x, x, x, x])
x_vstacked, x_vstacked.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.],
         [1., 2., 3., 4., 5., 6., 7., 8., 9.],
         [1., 2., 3., 4., 5., 6., 7., 8., 9.],
         [1., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 torch.Size([4, 9]))

In [47]:
x_hstacked = torch.hstack([x, x, x, x])
x_hstacked, x_hstacked.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9., 1., 2., 3., 4., 5., 6., 7., 8., 9.,
         1., 2., 3., 4., 5., 6., 7., 8., 9., 1., 2., 3., 4., 5., 6., 7., 8., 9.]),
 torch.Size([36]))

In [48]:
x = torch.arange(1, 7)
y = x.reshape(2,3)
y_stacked0 = torch.stack([y, y], dim=0)
y_stacked1 = torch.stack([y, y], dim=1)
y_stacked2 = torch.stack([y, y], dim=2)
y_vstacked = torch.vstack([y, y])
y_hstacked = torch.hstack([y, y])
print(y_stacked0, y_stacked0.shape)
print(y_stacked1, y_stacked1.shape)
print(y_stacked2, y_stacked2.shape)
print(y_vstacked, y_vstacked.shape)
print(y_hstacked, y_hstacked.shape)

tensor([[[1, 2, 3],
         [4, 5, 6]],

        [[1, 2, 3],
         [4, 5, 6]]]) torch.Size([2, 2, 3])
tensor([[[1, 2, 3],
         [1, 2, 3]],

        [[4, 5, 6],
         [4, 5, 6]]]) torch.Size([2, 2, 3])
tensor([[[1, 1],
         [2, 2],
         [3, 3]],

        [[4, 4],
         [5, 5],
         [6, 6]]]) torch.Size([2, 3, 2])
tensor([[1, 2, 3],
        [4, 5, 6],
        [1, 2, 3],
        [4, 5, 6]]) torch.Size([4, 3])
tensor([[1, 2, 3, 1, 2, 3],
        [4, 5, 6, 4, 5, 6]]) torch.Size([2, 6])


In [49]:
# squeeze
x = torch.zeros(2, 1, 2, 1, 2)
y = torch.squeeze(x)
x.size(), y.size()

(torch.Size([2, 1, 2, 1, 2]), torch.Size([2, 2, 2]))

In [50]:
# unsqueeze
x = torch.zeros(2, 2)
y = torch.unsqueeze(x, 0)
z = torch.unsqueeze(x, 1)
w = torch.unsqueeze(x, 2)
x.size(), y.size(), z.size(), w.size()

(torch.Size([2, 2]),
 torch.Size([1, 2, 2]),
 torch.Size([2, 1, 2]),
 torch.Size([2, 2, 1]))

In [51]:
# permute (returns a view of the original tensor input with its dimensions permuted)
x = torch.arange(24).reshape(2,3,4)
print(x.shape, x[0, 0, :], x[0, :, 0])
y = x.permute(2, 0, 1)
print(y.shape, y[:, 0, 0], y[0, 0, :])

torch.Size([2, 3, 4]) tensor([0, 1, 2, 3]) tensor([0, 4, 8])
torch.Size([4, 2, 3]) tensor([0, 1, 2, 3]) tensor([0, 4, 8])


## Indexing (selecting data from tensors)

Indexing with Pytorch is similar to indexing with NumPy.

In [52]:
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
print(x[0])
print(x[0][0])
print(x[0][0][0])

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([1, 2, 3])
tensor(1)


In [53]:
print(x[0, :, :])
print(x[0, 0, :])
print(x[0, 0, 0])

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([1, 2, 3])
tensor(1)


In [54]:
print(x[0, 1:2, :1])
print(x[0, 1:3, :1])
print(x[0, 1:, :1])

tensor([[4]])
tensor([[4],
        [7]])
tensor([[4],
        [7]])


### PyTorch and NumPy

* Data in NumPy array ---> PyTorch tensor: `torch.from_numpy(ndarray)`
* Data in PyTorch tensor ---> Numpy array: `torch.tensor.numpy()`

In [55]:
# NumPy array to tensor

import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
# warning: when converting from numpy to torch, the default dtype is float64
tensor1 = torch.from_numpy(array).type(torch.float32)
array, tensor, tensor1

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64),
 tensor([1., 2., 3., 4., 5., 6., 7.]))

In [56]:
# Tensor to NumPy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
numpy_tensor1 = tensor.type(torch.float64).numpy()
numpy_tensor2 = tensor.numpy().astype(np.float64)
tensor, numpy_tensor, numpy_tensor1, numpy_tensor2

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32),
 array([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.]))

### Reproducibility

To perform repeatable experiments by setting *random seeds*.



In [57]:
import torch

# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.6440, 0.7071, 0.6581, 0.4913],
        [0.8913, 0.1447, 0.5315, 0.1587],
        [0.6542, 0.3278, 0.6532, 0.3958]])

Tensor B:
tensor([[0.9147, 0.2036, 0.2018, 0.2018],
        [0.9497, 0.6666, 0.9811, 0.0874],
        [0.0041, 0.1088, 0.1637, 0.7025]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [58]:
import torch

# # Set the random seed
RANDOM_SEED=42 # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called
# Without this, tensor_D would be different to tensor_C
torch.random.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

### Running tensors and PyTorch on the GPUs

In [59]:
!nvidia-smi

zsh:1: command not found: nvidia-smi


In [60]:
# check for GPU access with PyTorch
import torch
torch.cuda.is_available()

False

In [61]:
# Set device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [62]:
# Count number of devices
torch.cuda.device_count()

0

### Getting PyTorch to run on Apple Silicon

In [63]:
# Check for Apple Silicon GPU
import torch
torch.backends.mps.is_available() # Note this will print false if you're not running on a Mac

True

In [64]:
# Set device type
if torch.cuda.is_available():
    device = "cuda" # Use NVIDIA GPU (if available)
elif torch.backends.mps.is_available():
    device = "mps" # Use Apple Silicon GPU (if available)
else:
    device = "cpu" # Default to CPU if no GPU is available
device

'mps'

### Putting tensors (and models) on the GPU

In [65]:
# Create a tensor (default on the CPU)
tensor = torch.tensor([1, 2, 3])

# Tensor not on GPU
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [66]:
# Move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='mps:0')

### Moving tensors back to the CPU

In [67]:
# If tensors is on GPU, can't transform it to NumPy
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])