In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import torch
print(torch.__version__)
print(torch.cuda.is_available())  # True if PyTorch can use GPU


2.9.0+cpu
False


**What is PyTorch?**
- most popular research deep learning framework
- write fast deep learning code in Python (able to run on a GPU/many GPUs) 
- Able to access many pre-built deep learning models (Torch Hub/ torchdivisions.models)
- whole stack: preprocess data, model data, deploy model in your application/cloud
- originally designed and used in-house by Facebook/Meta (now open-source and used by companies such as Tesla, Microsoft, OpenAI)

**Tensors (like NumPy arrays, but with GPU acceleration).**

Example: Running physics simulations or mathematical optimization.

Creating an empty Tensor

In [4]:
x = torch.empty(2, 3) # Creates a 2x3 matrix with uninitialized values
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [5]:
x = torch.rand(2, 3, dtype=torch.float32) #float32 means 32-bit floating point 
print(x)
print(x.dtype)
print(x.size())  # 

tensor([[0.2162, 0.6979, 0.5876],
        [0.7250, 0.8474, 0.8090]])
torch.float32
torch.Size([2, 3])


In [6]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
print(x)
print(y)
z = x + y
z = torch.add(x, y)  # another way to add
print(z)

y.add_(x)  # in-place addition
print(y)



tensor([[0.0427, 0.9045, 0.0266],
        [0.2643, 0.3797, 0.5388]])
tensor([[0.0405, 0.9960, 0.8178],
        [0.7977, 0.2083, 0.4024]])
tensor([[0.0832, 1.9005, 0.8444],
        [1.0620, 0.5880, 0.9412]])
tensor([[0.0832, 1.9005, 0.8444],
        [1.0620, 0.5880, 0.9412]])


In [7]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
print(x)
print(y)

z = x - y
z = torch.sub(x, y)  # another way to subtract
print(z)

tensor([[0.4438, 0.6561, 0.0963],
        [0.7901, 0.4594, 0.3508]])
tensor([[0.1286, 0.2632, 0.9576],
        [0.0715, 0.2947, 0.0384]])
tensor([[ 0.3152,  0.3929, -0.8614],
        [ 0.7186,  0.1647,  0.3125]])


In [8]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
print(x)
print(y)

z = x * y
z = torch.mul(x, y)  # another way to multiply
print(z)

tensor([[0.3300, 0.4379, 0.1028],
        [0.2261, 0.7465, 0.4833]])
tensor([[0.2588, 0.5717, 0.1556],
        [0.1012, 0.8391, 0.2250]])
tensor([[0.0854, 0.2503, 0.0160],
        [0.0229, 0.6264, 0.1087]])


In [9]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
print(x)
print(y)

z = x * y
z = torch.div(x, y)
print(z)

tensor([[0.9417, 0.5666, 0.3091],
        [0.2304, 0.8315, 0.1816]])
tensor([[0.7497, 0.3809, 0.0585],
        [0.3629, 0.6711, 0.4536]])
tensor([[1.2562, 1.4876, 5.2819],
        [0.6349, 1.2390, 0.4004]])


In [10]:
x = torch.rand(2, 3)

print(x)
print(x[:, 1])  #all rows, column 1
print(x[1, :])  #row 1, all columns
print(x[1, 1].item())  #get the value as a standard Python number


tensor([[0.2029, 0.2472, 0.3814],
        [0.1498, 0.7939, 0.2005]])
tensor([0.2472, 0.7939])
tensor([0.1498, 0.7939, 0.2005])
0.7939461469650269


In [11]:
#scalar tensors
scalar = torch.tensor(7)
scalar

tensor(7)

In [12]:
scalar.ndim

0

In [13]:
#get tensor back as a Python int
scalar.item()

7

In [14]:
#vector
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [15]:
vector.ndim

1

In [16]:
vector.shape #2 elements in vector

torch.Size([2])

In [17]:
#Matrix
matrix = torch.tensor([[1,2,3],
                        [4,5,6],
                        [7,8,9]])
matrix

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [18]:
matrix.ndim

2

In [19]:
matrix.shape #3 number of rows, 3 number of columns

torch.Size([3, 3])

In [20]:
#Tensor
tensor = torch.tensor([[[1,2,3],
                        [4,5,6],
                        [7,8,9],
                        [10,11,12]],
                       [[19,20,21],
                        [22,23,24],
                        [25,26,27],
                        [28,29,30]]])
tensor   #most of the tensors we don't write by hand like this

tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9],
         [10, 11, 12]],

        [[19, 20, 21],
         [22, 23, 24],
         [25, 26, 27],
         [28, 29, 30]]])

In [21]:
tensor.ndim #rows, columns and a layer, so total 3 dimensions

3

In [22]:
tensor.shape 
#2 layers and each matrix has 3 rows and 3 columns 
#therefore shape shows first the number of layers, then number of rows in the matrix and then the number of elements in each row

torch.Size([2, 4, 3])

**Random Tensors**

Random tensors are important because the way many neural networks learn is that they start with tensors full of random
numbers and then adjust those random numbers to better represent the data.

'Start with random numbers ==> look at data ==> update random numbers ==> look at data ==> update random numbers

In [23]:
#create a random tensor of size (3,4)
random_tensor = torch.rand(3, 4) #creates a tensor of 3 rows and 4 columns with random values between 0 and 1
random_tensor

tensor([[0.7480, 0.6417, 0.6538, 0.6774],
        [0.8526, 0.7786, 0.9433, 0.5988],
        [0.0562, 0.7834, 0.9179, 0.9912]])

In [24]:
random_tensor.ndim

2

In [25]:
random_tensor = torch.rand(1, 3, 4)
random_tensor

tensor([[[0.6927, 0.2978, 0.0880, 0.7969],
         [0.1174, 0.3294, 0.5533, 0.9187],
         [0.0212, 0.1811, 0.5262, 0.8606]]])

In [26]:
random_tensor.ndim

3

In [27]:
#create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size = (3, 224, 224)) #color channels (RGB), height, width
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([3, 224, 224]), 3)

In [28]:
#Create a tensor of all zeros
zeros = torch.zeros (size = (3, 4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [29]:
#Create a tensor of all ones
ones = torch.ones (size = (3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [30]:
ones.dtype

torch.float32

In [31]:
#create a range of tensors and tensors-like
one_to_ten = torch.arange(0, 10)  #similar to python's range function
one_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [32]:
one_to_ten = torch.arange(start = 1, end = 11, step = 1 )
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [33]:
#creating tensors like
ten_zeros = torch.zeros_like(input = one_to_ten)  #creates a tensor of zeros with the same shape as one_to_ten
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

**Tensor Datatypes**

In [34]:
#Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                                     dtype = None, #what dtype is this tensor
                                     device = None, #what device is this tensor on
                                     requires_grad = False) #whether or not to track gradients with this tensor operations
float_32_tensor

                                     

tensor([3., 6., 9.])

**What is meant by 32-bit floatung point, 16-bit floating point and 64-bit floating point?**
- 32-bit is sigle precision (default)
- 16-bit is half precision
- These numbers represent how much detail a single numbers is stored in memory.

**NOTE:** Tensor datatypes is one of the 3 big errors you'll run with Pytorch and deep learning:
1. Tensors not right datatypes
2. Tensors not right shape
3. Tensors not on the right device

In [35]:
float_32_tensor.dtype  

torch.float32

In [36]:
#convert float32 tensor to float16 tensor (reduced precision, half precision)
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [37]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [38]:
int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.long) #long means int64
int_32_tensor

tensor([3, 6, 9])

In [39]:
float_32_tensor * int_32_tensor

tensor([ 9., 36., 81.])

**Getting Information from Tensors (Tensors attributes)**
1. Tensors not right datatypes - to do get datatype from a tensor, can use tensor.dtype
2. Tensors not right shape - to get shape from a tensor, can use tensor.shape
3. Tensors not on the right device- to get device from a tensor, can use tensor.device

In [40]:
##create a tensor
some_tensor = torch.rand(3, 4)
some_tensor

tensor([[0.8068, 0.2817, 0.6833, 0.9806],
        [0.6558, 0.9452, 0.6593, 0.0701],
        [0.4980, 0.7698, 0.1036, 0.7358]])

In [41]:
some_tensor.size (), some_tensor.shape

(torch.Size([3, 4]), torch.Size([3, 4]))

In [42]:
##find out the details about some tensor
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device tensor is stored on: {some_tensor.device}")

tensor([[0.8068, 0.2817, 0.6833, 0.9806],
        [0.6558, 0.9452, 0.6593, 0.0701],
        [0.4980, 0.7698, 0.1036, 0.7358]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device tensor is stored on: cpu


##Manipulating tensors (tensor operations)

Tensor operations include
-  Addition
- Substraction
- Multiplication (element wise)
- Division
- Matrix multiplication


In [43]:
#create a tensor
tensor = torch.tensor([1, 2, 3])
tensor+10

tensor([11, 12, 13])

In [44]:
#multiply tensor by 10
tensor *10

tensor([10, 20, 30])

In [45]:
tensor

tensor([1, 2, 3])

In [46]:
#subtract tensor by 10
tensor-10


tensor([-9, -8, -7])

In [47]:
# Pytorch built-in functions
torch.mul(tensor, 10)

tensor([10, 20, 30])

In [48]:
torch.add(tensor, 10)

tensor([11, 12, 13])

**Matrix Multiplication**
Two main ways of performing multiplication in neural networks and deep learning:
1. Element-wise multiplication
2. Matrix multiplication

There are two main rules that performing matrix multiplication needs to satisfy: 
1. The **inner dimensions** must match:
e.g. (3, 2) @ (3, 2) won't work
     (2, 3) @ (3, 2) will work
     



In [49]:
##Element wise multiplication
print(tensor, "*", tensor)
print(f"equals: {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
equals: tensor([1, 4, 9])


In [50]:
#Matrix multiplication (basically the dot product using torch function)
torch.matmul(tensor, tensor)

tensor(14)

In [54]:
# can also use the '@' symbol for matrix multiplication, though not recommended
tensor @ tensor

tensor(14)

In [51]:
#Calculate the dot product using a for loop
# (avoid doing operations with for loops at all cost, they are computationally expensive)
value = 0
for i in  range(len(tensor)):
                value += tensor[i] * tensor[i]
print(value)

tensor(14)


In [55]:
#Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                                     dtype = None, #what dtype is this tensor
                                     device = None, #what device is this tensor on
                                     requires_grad = False) #whether or not to track gradients with this tensor operations
                                     
float_32_tensor

tensor([3., 6., 9.])

**One of the most common errors in deep learning (shape errors)**

In [67]:
#Shapes need to be in the right place
tensor_A = torch.tensor ([[1, 2], 
                          [3, 4],
                          [5, 6]], dtype = torch.float32)
tensor_B =  torch.tensor([[7, 10],
                          [8, 11],
                          [9, 12]], dtype = torch.float32)
#here matrix multiplication will give an error
#Inner dimensions of the matrices need to match


**Transpose** switches the dimensions of a given tensor
In Pytorch:
- **torch.transpose(input, dim0, dim1)** : where *input* is the desired tensor to transpose and *dim0* and *dim1* are the dimensions to be swaped
- **tensor.T** : where *tensor* is the desired tensor to transpose

In [69]:
#Transpose tensor_B
print(tensor_B.T)
print(tensor_B.T.shape)

tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])
torch.Size([2, 3])


In [66]:
#Transpose tensors and multiply
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape}") #inner dimensions match
multiplication_output = torch.matmul (tensor_A, tensor_B.T)
print(multiplication_output)

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3])
tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])


**Neural networks are full of matrix multiplications and dot products**

 The *torch.nn.Linear()* module, also knows as a feed-forward layer or fully connected layer, implements a matrix multiplication between an input x and a weights matrix A.


Where: 
 $$ y = x.A^T + b $$

x is the input to the layer (deep learning is a stack of layers like torch.nn.Linear() and others on top of each other).

A is the weights matrix created by the layer, this starts out as random numbers that get adjusted as a neural network learns to better represent patterns in the data (notice the "T", that's because the weights matrix gets transposed).

**Note**: You might also often see W or another letter like X used to showcase the weights matrix.

b is the bias term used to slightly offset the weights and inputs.
y is the output (a manipulation of the input in the hopes to discover patterns in it).


**Finding the min, max, mean, sum, etc (aggregation)**

In [70]:
#create a tensor
x = torch.arange(0, 100, 10)
x


tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [80]:
#Using torch built-in functions, these are more commonly used
print(f"Minimum: {torch.min(x)}")
print(f"Maximum: {torch.max(x)}")
print(f"Mean: {torch.mean(x.float())}")  #convert to float first #won't work without float datatype
print(f"Sum: {torch.sum(x)}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


In [78]:
#Other ways
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
print(f"Mean: {x.type(torch.float32).mean()}")
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


**Positional min/max**

To find the index of a tensor where the max or minimum occurs with *torch.arg(max)* and *torch.argmin()* respectively.


In [81]:
tensor = torch.arange(0, 100, 10)
print(f"Tensor: {tensor}")

#Returns index of min and max
print(f"Index of minimum value: {torch.argmin(tensor)}")
print(f"Index of maximum value: {torch.argmax(tensor)}")

Tensor: tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
Index of minimum value: 0
Index of maximum value: 9


**Change the tensor datatype**

A common issue with deep learning operations is having your tensors in different datatypes.

If one tensor is in torch.float64 and another is in torch.float32, you might run into some errors.

Datatypes of tensors can be changed using **torch.Tensor.type(dtype=None)** where the dtype parameter is the datatype you'd like to use.

In [92]:
tensor_default = torch.arange(0., 100., 10.)
print(tensor_default)
tensor_default.dtype

tensor([ 0., 10., 20., 30., 40., 50., 60., 70., 80., 90.])


torch.float32

In [93]:
#change the datatype to float16
tensor_float16 = tensor.type(torch.float16)
tensor_float16

tensor([ 0., 10., 20., 30., 40., 50., 60., 70., 80., 90.], dtype=torch.float16)

**Note**: Different datatypes can be confusing to begin with. But think of it like this, the lower the number (e.g. 32, 16, 8), the less precise a computer stores the value. And with a lower amount of storage, this generally results in faster computation and a smaller overall model. Mobile-based neural networks often operate with 8-bit integers, smaller and faster to run but less accurate than their float32 counterparts. 

**Reshaping, stacking, squeezing and unsqueezing**

To reshape or change the dimensions of your tensors without actually changing the values inside them.

torch.reshape(input, shape):	Reshapes input to shape (if compatible), can also use torch.Tensor.reshape().

Tensor.view(shape):	Returns a view of the original tensor in a different shape but shares the same data as the original tensor.

torch.stack(tensors, dim=0):	Concatenates a sequence of tensors along a new dimension (dim), all tensors must be same size.

torch.squeeze(input):	Squeezes input to remove all the dimenions with value 1.

torch.unsqueeze(input, dim):	Returns input with a dimension value of 1 added at dim.

torch.permute(input, dims):	Returns a view of the original input with its dimensions permuted (rearranged) to dims.

In [3]:
import torch
x = torch.arange(1., 10.)
x, x.shape


(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [9]:
#add an extra dimension with torch.reshape()
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [12]:
# Change view (keeps same data as original but changes view)
z = x.view(1, 9)
z, z.shape
#Remember though, changing the view of a tensor with torch.view() really only creates a new view of the same tensor.
#So changing the view changes the original tensor too.

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [14]:
#Changing z changes x
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [17]:
#stacking a new tensor on top of itself five times
x_stacked = torch.stack([x]*5, dim=0)
x_stacked, x_stacked.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
         [5., 2., 3., 4., 5., 6., 7., 8., 9.],
         [5., 2., 3., 4., 5., 6., 7., 8., 9.],
         [5., 2., 3., 4., 5., 6., 7., 8., 9.],
         [5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 torch.Size([5, 9]))

In [None]:
#stacking a new tensor on top of itself five times
x_stacked = torch.stack([x]*5, dim=1) 
x_stacked, x_stacked.shape

(tensor([[5., 5., 5., 5., 5.],
         [2., 2., 2., 2., 2.],
         [3., 3., 3., 3., 3.],
         [4., 4., 4., 4., 4.],
         [5., 5., 5., 5., 5.],
         [6., 6., 6., 6., 6.],
         [7., 7., 7., 7., 7.],
         [8., 8., 8., 8., 8.],
         [9., 9., 9., 9., 9.]]),
 torch.Size([9, 5]))

In [21]:
#torch.squeeze() (I remember this as squeezing the tensor to only have dimensions over 1).
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 9])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([9])


In [22]:
#torch.unsqueeze() to add a dimension value of 1 at a specific index.
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")


Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 9])


In [None]:
#torch.permute(input, dims), where the input gets turned into a view with the new dims.
x_permuted = x_unsqueezed.permute(1, 0) #swap dimensions at index 1 with index 0 i.e. from (1,9) to (9,1) 
print(f"\nNew tensor: {x_permuted}")
print(f"New shape: {x_permuted.shape}")


New tensor: tensor([[5.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])
New shape: torch.Size([9, 1])


**Indexing**