# 00. PyTorch Fundamentals

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.0.0


## Introduction to Tensors

### Creating tensors

In [10]:
# scalar 
scalar = torch.tensor(7)
scalar

tensor(7)

In [13]:
scalar.ndim

0

In [14]:
# Get tensor back as Python int
scalar.item()

7

In [15]:
# Vector
vector = torch.tensor([3,4])
vector

tensor([3, 4])

In [16]:
vector.ndim

1

In [17]:
vector.shape

torch.Size([2])

In [18]:
# MATRIX
MATRIX = torch.tensor([[7,8],
                      [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [19]:
MATRIX.ndim

2

In [20]:
MATRIX[1]

tensor([ 9, 10])

In [22]:
MATRIX.shape

torch.Size([2, 2])

In [23]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3],
                       [4,5,6],
                       [3,4,5]]])
TENSOR

tensor([[[1, 2, 3],
         [4, 5, 6],
         [3, 4, 5]]])

In [24]:
TENSOR.ndim

3

In [25]:
TENSOR.shape

torch.Size([1, 3, 3])

In [26]:
TENSOR[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [3, 4, 5]])

In [29]:
TENSOR[0,0]

tensor([1, 2, 3])

In [36]:
TENSOR2 = torch.tensor([[[1,2,3,4,5],
                         [2,3,4,5,6],
                        [3,4,5,6,7],
                        [4,5,6,7,8]]])
TENSOR2

tensor([[[1, 2, 3, 4, 5],
         [2, 3, 4, 5, 6],
         [3, 4, 5, 6, 7],
         [4, 5, 6, 7, 8]]])

In [37]:
TENSOR2.shape

torch.Size([1, 4, 5])

### Random tensors

Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

In [44]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.1599, 0.7427, 0.7948, 0.6214],
        [0.4547, 0.5424, 0.6085, 0.5258],
        [0.2695, 0.6385, 0.3396, 0.3531]])

In [45]:
random_tensor.ndim

2

In [48]:
# Create a random tensor with similar shape to an image
random_image_size_tensor = torch.rand(size=(3, 224, 224)) # height, width, colour channels (R, G, B)
random_image_size_tensor.ndim, random_image_size_tensor.shape

(3, torch.Size([3, 224, 224]))

### Zero and ones

In [49]:
# Create a tensor of all zeros
zeros = torch.zeros(3,4)
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [50]:
# Create a tensor of all ones
ones = torch.ones(3,4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [51]:
ones.dtype

torch.float32

### Creating a range of tensors and tensors-like

In [54]:
# torch.range() is depreicated, instead use torch.arange()
one_to_ten = torch.arange(start=1, end=11,step=1)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [55]:
# Creating tensors like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor Datatypes

**Note:** Three major things to make sure when dealing with tensors in PyTorch

1. Tensors not in right datatype
2. Tensors not in right shape
3. Tensors not on the right device

In [59]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                              dtype=None, # what datatype is the tensor (e.g. float32 or float16)
                              device=None, # what device is your tensor on
                              requires_grad=False) # whether or not to track gradients with this tensors operations
float_32_tensor

tensor([3., 6., 9.])

In [60]:
float_32_tensor.dtype

torch.float32

In [61]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [65]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

### Getting information fom tensors (tensor attributes)

1. datatype - `tensor.dtype`
2. shape - `tensor.shape`
3. device - `tensor.device`

In [88]:
# Create a tensor
some_tensor = torch.rand(size=(3,4))
some_tensor

tensor([[0.2960, 0.7429, 0.4960, 0.1909],
        [0.4105, 0.1379, 0.4345, 0.8660],
        [0.9006, 0.4456, 0.3602, 0.7803]])

In [89]:
# Find out details about some tensor
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape os tensor: {some_tensor.shape}")
print(f"Device tensor is on: {some_tensor.device}")

tensor([[0.2960, 0.7429, 0.4960, 0.1909],
        [0.4105, 0.1379, 0.4345, 0.8660],
        [0.9006, 0.4456, 0.3602, 0.7803]])
Datatype of tensor: torch.float32
Shape os tensor: torch.Size([3, 4])
Device tensor is on: cpu


### Manipulating Tensors (tensor operations)

Tensor operations include:

* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Matrix multiplication

In [7]:
# Create a tensor and add 10 to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [173]:
# Multiply tensor by 10
tensor * 10

tensor([10, 20, 30])

In [174]:
tensor

tensor([1, 2, 3])

In [175]:
# Substract 10
tensor - 10

tensor([-9, -8, -7])

In [8]:
# Division
tensor / 10

tensor([0.1000, 0.2000, 0.3000])

In [176]:
# Try out PyTorch in-built functions
torch.mul(tensor,10)

tensor([10, 20, 30])

In [177]:
torch.add(tensor,10)

tensor([11, 12, 13])

### Matrix multiplication

Two main ways to performing multiplication in neural networks and deep learning:

1. Element-wise multiplication
2. Matrix multiplication (dot product)


In [169]:
# Element-wise multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [170]:
# Matrix multiplication
torch.matmul(tensor,tensor)

tensor(14)

In [171]:
tensor

tensor([1, 2, 3])

In [172]:
# Matrix multiplication by hand
1*1 + 2*2 + 3*3

14

In [178]:
%%time
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
print(value)

tensor(14)
CPU times: user 1.42 ms, sys: 781 µs, total: 2.2 ms
Wall time: 1.49 ms


In [180]:
%%time
torch.matmul(tensor,tensor)

CPU times: user 202 µs, sys: 40 µs, total: 242 µs
Wall time: 189 µs


tensor(14)

#### Two main rules when performing matrix multiplication

1. The **inner dimensions** must match:

* `(3,2) @ (3,2)` won't work
* `(2,3) @ (3,2)` will work
* `(3,2) @ (2,3)` will work

2. The resulting matrix has the shape of the **outer dimenstions**:

* `(2,3) @ (3,2)` -> `(2,2)`
* `(3,2) @ (2,3)` -> `(3,3)`

In [181]:
torch.matmul(torch.rand(3,2), torch.rand(2,3)).shape

torch.Size([3, 3])

### One of the most commen error in Matrix multiplication is shape error

In [185]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1,2],
                         [3,4],
                         [5,6]])

tensor_B = torch.tensor([[7,10],
                         [8,11],
                         [9,12]])

torch.mm(tensor_A,tensor_B) # torch.mm is the same as torch.matmul and @ (These are aliases for writing less code)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

To fix our tensor shape issues, we can manipulate the shape of one of our tensors using a **transpose**.

A **transpose** switches the axes or dimensions of a given tensor

In [186]:
tensor_B.T

tensor([[ 7,  8,  9],
        [10, 11, 12]])

In [187]:
tensor_A.shape, tensor_B.T.shape

(torch.Size([3, 2]), torch.Size([2, 3]))

In [189]:
# The Matrix multiplication operation works when tensor_B is transposed
output = torch.matmul(tensor_A,tensor_B.T)
output

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

In [190]:
#shape of output is equals to the outer dimensions
output.shape

torch.Size([3, 3])

### Finding the min, max, mean, sum, etc (tensor aggregation)

In [9]:
# Creare a tensor
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [10]:
# Find the min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [11]:
#Find the max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [16]:
# Find the mean - note: the torch.mean() function requires a tensor of float32 datatype to work
torch.mean(x.type(torch.float32)) , x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [17]:
# Find the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

## Finding the position of min and max

In [19]:
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [20]:
# Find the position of minimum value
x.argmin() # Index value of minimum value element

tensor(0)

In [21]:
# Find the position of maximum value
x.argmax() # Index of maximum value element

tensor(9)

## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - Reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but share the same memory as the original tensor
* Stacking - Combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - Removes all 1 dimensions from a tensor
* Unsqueeze - add a 1 dimension to a target tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way

In [11]:
# Let's create a tensor
import torch
x = torch.arange(1.0, 10.0 )
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [12]:
# Add an extra dimension
x_reshaped = x.reshape(9, 1)
x_reshaped, x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [13]:
# Change the view
z = x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [15]:
# Changing z changes x (because a view of a tensor shares the same memory as the original)
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [16]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [17]:
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [18]:
# torch.squeeze() - removes all single dimensions from a target tensor
x_reshaped = x.reshape(1, 9)
x_reshaped , x_reshaped.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [19]:
x_squezeed = x_reshaped.squeeze()
x_squezeed , x_squezeed.shape

(tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [20]:
x_unsqueezed = x_squezeed.unsqueeze(dim=0)
x_unsqueezed, x_unsqueezed.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [70]:
x_unsqueezed = x_squezeed.unsqueeze(dim=1)
x_unsqueezed, x_unsqueezed.shape

(tensor([[5.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [28]:
# torch.permute - rearrange the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(224, 224, 3)) # [height, width, color_channels]

# permute the original tensor to rearrange the axis (or dim) order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

x_permuted.shape # [color_channels, height, width]

torch.Size([3, 224, 224])

In [37]:
x_original[0, 0, 0] = 728218
x_original[0, 0, 0] , x_permuted[0, 0, 0]

(tensor(728218.), tensor(728218.))

## Indexing (selecting data from tensors)

Indexing with PyTorch is similar to indexing with NumPy.

In [40]:
# Create a tensor
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [41]:
# Let's index on our new tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [42]:
# Let's index on the middle bracket (dim = 1)
x[0][0]

tensor([1, 2, 3])

In [43]:
# Let's index on the most inner bracket (last dimension)
x[0][0][0]

tensor(1)

In [44]:
x[0][2][2]

tensor(9)

In [46]:
# You can also use ":" to select "all" of a target dimension
x[:, 0]

tensor([[1, 2, 3]])

In [47]:
# Get all values of 0th and 1st dimensions but only index 1 of 2nd dimendion
x[:, :, 1]

tensor([[2, 5, 8]])

In [48]:
# Get all values of the 0 dimension but only the 1 index value of 1st and 2nd dimension
x[:, 1, 1]

tensor([5])

In [49]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension
x[0, 0, :]

tensor([1, 2, 3])

In [64]:
# Inddex on x to return 9
print(x[0][2][2])
print(x[0, 2, 2])
print(x[:, 2, 2])

# Index on x to return 3, 6, 9
print(x[:,:,2])
print(x[0,:,2])

tensor(9)
tensor(9)
tensor([9])
tensor([[3, 6, 9]])
tensor([3, 6, 9])


## Pytortch tensors and NumPy

NumPy is a popular scientific Python numerical computing library.
And because of this, PyTorch has functionality to interact with it.

The two main methods you'll want to use for NumPy to PyTorch (and back again ) are :

* `torch.from_numpy(ndarray)` : NumPy array -> PyTorch tensor
* `tensor.numpy` : PyTorch tensor -> NumPy array

In [69]:
# NumPy array to tensor
import torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) # by default numpy arrays are float64 type, if you convert an numpy array to a pytorch tensor it'll keep the same data type (float64) 
array, tensor 

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [70]:
# Change the array, what'll happen to tensor
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [72]:
# tensor to numpy array
tensor = torch.ones(7) # create a tensor of ones with dtype32
numpy_tensor = tensor.numpy() # will be dtype=float32 unless changed
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [74]:
# change the tensor what'll happen to numpy arrat
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (trying to take the random out of random)

To reduce the randomness in neural networks and PyTorch comes the concept of **random seed.**
Essentially what random seed does is "flavour" the randomness.

In [1]:
import torch

# creare two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)

print(random_tensor_A == random_tensor_B)

tensor([[0.1935, 0.2437, 0.7934, 0.8766],
        [0.4556, 0.7843, 0.5475, 0.9378],
        [0.2838, 0.9972, 0.8470, 0.3669]])
tensor([[0.6933, 0.9508, 0.1357, 0.7887],
        [0.9299, 0.2778, 0.8321, 0.9801],
        [0.9082, 0.6229, 0.0531, 0.4297]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [13]:
# Let's make some random but reproducible tensors
import torch

# set the random seed
RANDOM_SEED= 42


torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)


torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Check for GPU access with PyTorch

In [1]:
# Check for GPU access
import torch
torch.backends.mps.is_available()

True

In [7]:
# Setup device agnostic code
device = "mps" if torch.backends.mps.is_available() else "cpu"
device

'mps'

In [3]:
# Count number of devices

#torch.backends.mps.device_count()
print(torch.__version__)

2.0.0


In [9]:
# MacMini M1 Chip

import torch
import timeit


a_cpu = torch.rand(1000, device='cpu')
b_cpu = torch.rand((1000, 1000), device='cpu')
a_mps = torch.rand(1000, device='mps')
b_mps = torch.rand((1000, 1000), device='mps')

print('cpu', timeit.timeit(lambda: a_cpu @ b_cpu, number=1000_00))
print('mps', timeit.timeit(lambda: a_mps @ b_mps, number=1000_00))

print('mps', timeit.timeit(lambda: torch.rand(1000, device='cpu') @ torch.rand((1000, 1000), device='cpu'), number=1000_00))
print('mps', timeit.timeit(lambda: torch.rand(1000, device='mps') @ torch.rand((1000, 1000), device='mps'), number=1000_00))


cpu 2.013002541999981
mps 2.899153292000051
mps 246.15194995900004
mps 9.835887625000055


In [2]:
# MacMini M1 Chip

import timeit

for s in [250,1000,10000]:
    print('size',s)
    print('cpu', timeit.timeit(lambda: torch.rand(s, device='cpu') @ torch.rand((s, s), device='cpu'), number=1000))
    print('mps', timeit.timeit(lambda: torch.rand(s, device='mps') @ torch.rand((s, s), device='mps'), number=1000))
    print()

size 250
cpu 0.20693562499999985
mps 0.2643940000000029

size 1000
cpu 2.7703225409999988
mps 0.1607398749999973

size 10000
cpu 305.074170666
mps 14.721663707999994



In [1]:
# Macbook M2-Pro Chip

import torch
import timeit


a_cpu = torch.rand(1000, device='cpu')
b_cpu = torch.rand((1000, 1000), device='cpu')
a_mps = torch.rand(1000, device='mps')
b_mps = torch.rand((1000, 1000), device='mps')

print('cpu', timeit.timeit(lambda: a_cpu @ b_cpu, number=1000_00))
print('mps', timeit.timeit(lambda: a_mps @ b_mps, number=1000_00))

print('mps', timeit.timeit(lambda: torch.rand(1000, device='cpu') @ torch.rand((1000, 1000), device='cpu'), number=1000_00))
print('mps', timeit.timeit(lambda: torch.rand(1000, device='mps') @ torch.rand((1000, 1000), device='mps'), number=1000_00))


cpu 1.9699033330000049
mps 2.851078958000002
mps 244.59739804100002
mps 9.544808584000009


In [2]:
# Macbook M2-Pro Chip


for s in [250,1000,10000]:
    print('size',s)
    print('cpu', timeit.timeit(lambda: torch.rand(s, device='cpu') @ torch.rand((s, s), device='cpu'), number=1000))
    print('mps', timeit.timeit(lambda: torch.rand(s, device='mps') @ torch.rand((s, s), device='mps'), number=1000))
    print()

size 250
cpu 0.15806716699995604
mps 0.0993107500000292

size 1000
cpu 2.5029747500000212
mps 0.09678083400001469

size 10000
cpu 289.8773350409999
mps 5.182745000000068



## Putting tensors (and modules) on GPU

In [8]:
# Create a tensor and move to GPU

device = "mps" if torch.backends.mps.is_available() else "cpu"
device

tensor = torch.tensor([1, 2, 3])

tensor_on_gpu = tensor.to(device)

tensor_on_gpu


tensor([1, 2, 3], device='mps:0')

In [11]:
# If tensor is on GPU, can't transform it to NumPy
tensor_on_gpu.numpy()

TypeError: can't convert mps:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [13]:
# Put tensor back on CPU

tensor_back_on_cpu = tensor_on_gpu.cpu()
tensor_back_on_cpu

tensor([1, 2, 3])

In [14]:
# Now we can transform it to a NumPy array

numpy_array = tensor_back_on_cpu.numpy()
numpy_array

array([1, 2, 3])