## PyTorch Fundamentals

In [1]:
## Import
import torch
print(torch.__version__)
 # Version 2.8.0 running on local cpu core
 # Pytorch can run run on GPUs/TPUs
 # If it was running on NVIDIA Cuda GPUs-> +cu111


2.8.0+cpu


### Tensors
A general container for numerical data that neural networks operate on.

![1__D5ZvufDS38WkhK9rK32hQ.jpg](attachment:1__D5ZvufDS38WkhK9rK32hQ.jpg)

Differ from NumPy arrays as arrays are data structures that store numerical values only. They are *generalized* versions of NumPy arrays that also store store weights, activations, functions, gradients and so on...for performing mathematical operations.

It's the main foundation of PyTorch models

In [2]:
## Scalar (A number- 0D)
scalar= torch.tensor(7)
print(scalar)
print('Shape:', scalar.shape)
print('n-dim:', scalar.ndim)
print('\nValue:', scalar.item())

tensor(7)
Shape: torch.Size([])
n-dim: 0

Value: 7


In [3]:
## Vector (array- 1D)
vector= torch.tensor([1,2,3,4,5])
print(vector)
print('Shape:', vector.shape)
print('n-dim:', vector.ndim)

tensor([1, 2, 3, 4, 5])
Shape: torch.Size([5])
n-dim: 1


In [4]:
## Matrix (2D array)
MATRIX= torch.tensor([[1,2,3],
                         [4,5,6]])
print(MATRIX)
print(f'Shape: {MATRIX.shape}')
print(f'n-dim: {MATRIX.ndim}')

tensor([[1, 2, 3],
        [4, 5, 6]])
Shape: torch.Size([2, 3])
n-dim: 2


In [5]:
## 3D Array/Tensor
TENSOR_3D= torch.tensor([[[1,2,3], [4,5,6]],
                         [[7,8,9],[10,11,12]]])
print(TENSOR_3D)
print(f'Shape: {TENSOR_3D.shape}')
 # Shape: (2 2D tensors, 2 1D tensors within the 2D tensors, 3 vectors per 2D array
print(f'n-dim: {TENSOR_3D.ndim}')
print(f'\n{TENSOR_3D[0]}')
print(f'\n{TENSOR_3D[1]}')


tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[ 7,  8,  9],
         [10, 11, 12]]])
Shape: torch.Size([2, 2, 3])
n-dim: 3

tensor([[1, 2, 3],
        [4, 5, 6]])

tensor([[ 7,  8,  9],
        [10, 11, 12]])


A widely folllowed convention is writing higher dimensional tensor variables (2D and above) uppercase for some reason perhaps to distinguish from the lower dimensional tensor variables (1D and below) which are written in lowercase.

#### Random Tensors
Neural Networks usually start with randommized tensors (containing random values) to initialize the weights and biases of the neural network. These random values are then updated during training to better represent the data.


In [6]:
## Create a random tensor of shape (3,4)- 3 rows, 4 columns
rand_tensor= torch.rand(3,4)
rand_tensor

tensor([[0.6286, 0.2407, 0.9429, 0.9763],
        [0.8754, 0.8596, 0.5657, 0.9389],
        [0.2860, 0.6648, 0.5336, 0.8213]])

In [7]:
## Multi-dimensional tensor
ndim_tensor= torch.rand(10,10,10,10,10,15)
print(f'Shape: {ndim_tensor.shape}')
print(f'ndim: {ndim_tensor.ndim}')
print(ndim_tensor)

Shape: torch.Size([10, 10, 10, 10, 10, 15])
ndim: 6


tensor([[[[[[4.3724e-01, 8.1030e-02, 9.2684e-02,  ..., 9.5203e-01,
             9.0016e-01, 6.1416e-02],
            [1.1649e-01, 5.2410e-02, 9.6805e-01,  ..., 6.1403e-01,
             4.6129e-01, 3.3043e-01],
            [6.2249e-01, 1.1935e-01, 3.1036e-01,  ..., 4.9606e-01,
             1.2197e-01, 5.7209e-01],
            ...,
            [2.7158e-01, 1.4038e-01, 5.1904e-01,  ..., 9.8640e-01,
             4.9686e-01, 6.9957e-01],
            [9.9573e-01, 3.8474e-02, 1.8959e-01,  ..., 5.5363e-01,
             1.4334e-01, 9.2406e-01],
            [7.4811e-01, 9.1246e-01, 8.5965e-01,  ..., 1.1254e-01,
             6.0773e-02, 4.6404e-01]],

           [[1.1693e-01, 2.1045e-01, 9.5445e-01,  ..., 9.5471e-01,
             4.8438e-01, 7.6292e-01],
            [6.8174e-01, 3.0524e-01, 1.8065e-01,  ..., 6.8879e-01,
             6.7243e-01, 7.2495e-01],
            [5.1625e-01, 6.3433e-01, 7.4989e-01,  ..., 5.8965e-01,
             9.3289e-01, 3.1538e-01],
            ...,
            [9.7639

In [8]:
## Random Tensor similar to an image tensor
random_image_tensor= torch.rand(size= (224, 224, 3))
 # Height, Width, color channels(red, green, blue)
 # You can choose to specify 'size=' or not
random_image_tensor.shape, random_image_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros & Ones Tensors

In [9]:
## Tensor of all zeros
zeros_tensor= torch.zeros(3,3,3)
print(f'Shape: {zeros_tensor.shape}')
print(f'ndim: {zeros_tensor.ndim}')
print(f'dtype: {zeros_tensor.dtype}')
zeros_tensor

Shape: torch.Size([3, 3, 3])
ndim: 3
dtype: torch.float32


tensor([[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]])

In [10]:
## Tensor of all ones
ones_tensor= torch.ones(3,3,3)
ones_tensor

tensor([[[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]])

### Empty Tensors
Unlike random, zeros and ones tensors that are filled (initialized) with specific values, empty tensors are unitialized. This means they are not filled with any specified value (randomizes or zeros or ones) rather, they contain whatever values were already present at that memory location.

As they don't initialize any values, `torch.empty()` is faster to create than `torch.zeros()` or `torch.ones()`. However, since they are uninitialized, they may contain arbitrary values, which can lead to unpredictable behavior if not handled carefully. Therefore, they are best used when:
- You plan to fill the tensor immediately after creation
- Performance is critical and you want to avoid the overhead of initialization (This overhead is more noticable when creating larger tensors. e.g.- `torch.empty([(5000,5000)])`)
- When using operations that will overwrite all values in the tensor anyway





In [None]:
## Empty tensors vs zeroz tensors benchmark
import time

sizes= [(100,100), (1000,1000), (5000,5000), (10000, 10000)]

for s in sizes:
    print(f'\nTesting size {s}')

    # Time take by torch.empty()
    start= time.time() # Current time in seconds
    for _ in range(10): # Generate 10 empty tensors
        t= torch.empty(s)
    empty_time= time.time() - start # time elapsed

    # Time taken by torch.zeros()
    start= time.time()
    for _ in range(10):
        t= torch.zeros(s)
    zeros_time= time.time()- start

    print(f'Empty: {empty_time:.2f}s')
    print(f'Zeros: {zeros_time:.2f}s')
    print(f'Ratio: {empty_time/zeros_time:.2f}x')
    # Empty tensors are faster to create than zeros tensors. Especially for larger tensors 



Testing size (100, 100)
Empty: 0.00s
Zeros: 0.00s
Ratio: 0.00x

Testing size (1000, 1000)
Empty: 0.00s
Zeros: 0.01s
Ratio: 0.00x

Testing size (5000, 5000)
Empty: 0.00s
Zeros: 0.13s
Ratio: 0.02x

Testing size (10000, 10000)
Empty: 0.00s
Zeros: 0.59s
Ratio: 0.01x


### Transposed Tensors
This refers to changing the dimensions of a tensor by swapping its axes i.e: 
- transposing a 2D tensor (matrix) involves swapping its rows and columns.
- transposing a 3D tensor (matrix) involves swapping its matrix, rows and columns....

and so on....Transposing is done using

In [None]:
tensor= torch.rand(3,4)
tensor_transposed= torch.transpose.T 
 # This method is better used on 2D tensors. For any higher order tensors, it reverses the order of the tensors. eg- shape (2,3,4) to shape (4,3,2)
 # For tensors with higher dimensions use `tensor.transpose(_, _)` or torch.transpose(tensor, _ ,_) where we specify the two dimensions to be swapped
 # Only two dimensions can be specified as transposing swaps exactly two dimensions

## Shape Change
print(f'Original Tensor: {tensor.shape}')
print(f'Transposed Tensor: {tensor_transposed.shape}')

## Checking whether underlying data is copied or shared
tensor_transposed[-1,-1]= 50
print('\nOriginal Tensor')
print(tensor)

print('\nTransposed Tensor')
print(tensor_transposed)

Original Tensor: torch.Size([3, 4])
Transposed Tensor: torch.Size([4, 3])

Original Tensor
tensor([[ 0.7433,  0.1548,  0.9901,  0.1524],
        [ 0.4109,  0.0889,  0.6286,  0.9992],
        [ 0.6108,  0.7678,  0.4348, 50.0000]])

Transposed Tensor
tensor([[ 0.7433,  0.4109,  0.6108],
        [ 0.1548,  0.0889,  0.7678],
        [ 0.9901,  0.6286,  0.4348],
        [ 0.1524,  0.9992, 50.0000]])


The last element for each tensor has been altered which means that both the original and transposed tensors share memory; we have just created a different view of the same data. To create an entirely new tensor, we would create a copy via the `.clone()` method.

In [11]:
tensor_3d= torch.rand(3,4,5)
tensor_3d_transposed= torch.transpose(tensor_3d, 1,2)
 # Transposing swaps exactly two dimensions
 # In the above, we swap dimensions 1 and 2

## Shape Change
print(f'Original Tensor: {tensor_3d.shape}')
print(f'Transposed Tensor: {tensor_3d_transposed.shape}')

Original Tensor: torch.Size([3, 4, 5])
Transposed Tensor: torch.Size([3, 5, 4])


### Range of tensors
Here we use `torch.arange(start, stop, skip)` to specify a rage of values between a start and stop value with an optional no. of values to skip within the range.

In [11]:
## Range of tensors 1-15
one_to_15= torch.arange(1,16)
one_to_15

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [12]:
## Tensor range of even number from 1-50
torch.arange(2, 50, 2)

tensor([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36,
        38, 40, 42, 44, 46, 48])

In [13]:
## Creating tensors like
fifteen_zeros= torch.zeros_like(input= one_to_15)
print(f'{fifteen_zeros.shape}, {fifteen_zeros.ndim}')
print(f'\n{fifteen_zeros}')
 # This type of tensor does not contain float numbers

torch.Size([15]), 1

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [14]:
## Zeros tensor (Conatins float numbers)
zeros_fifteen= torch.zeros(15)
print(f'{zeros_fifteen.shape}, {zeros_fifteen.ndim}')
print(f'\n{zeros_fifteen}')

torch.Size([15]), 1

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


### Tensor Datatypes

In [15]:
## Float32 Tensor
# The default datatype of tensors is Float32
float32_tensor= torch.tensor([3.0, 6.0, 9.0], dtype= None)
print(f'{float32_tensor.dtype}')
print(float32_tensor)


torch.float32
tensor([3., 6., 9.])


In [16]:
## Float16
float16_tensor= torch.tensor([3.0, 6.0, 9.0], 
                             dtype= torch.float16)

print(float16_tensor)

tensor([3., 6., 9.], dtype=torch.float16)


**Float Datatypes**

`float(16, 32, 64)` are floating point number formats where each number represents how many bits (binary digits) are used to store one number.

More bits means more precision (due to more decimal places) but also takes up more memory and slows down computation.

Lower bits means less precision (due to fewer decimal places) but also takes up less memory and speeds up computation. Therefore a tradeoff must be considered.


**Tensor Datatypes**

When dealing with tensor datatypes, we may run into either of three common problems:
- Tensors not right datatype *(in some cases)* (e.g.- Performing matrix multiplication using float32 and float64 tensors will bring an error)
    - Get datatype of tensor using `tensor.dtype`
- Tensors not right shape (e.g.- Performing matrix multiplication using a tensor of shape (2,3) and (3,2) will also bring an error)
    - Get shape of tensor using `tensor.shape` or `tensor.size()`
- Tensors not right device (e.g.- Performing matrix multiplication using a tensor on CPU and another on a GPU will also bring an error)
    - Get device of tensor using `tensor.device`



#### Tensor Device

In [17]:
cpu_tensor= torch.tensor([3,4,5], 
                         dtype= None, # Datatype of tensor
                         device= 'cpu', # What device the tensor is on
                         requires_grad= False) # Whether or not to track gradients with this tensor's operations
cpu_tensor.device

device(type='cpu')

#### Changing Datatypes 

In [18]:
## We have a float32 tensor
tensor1= torch.rand(10, 3)
tensor1

tensor([[0.2845, 0.5758, 0.6724],
        [0.5913, 0.1309, 0.9990],
        [0.6855, 0.4802, 0.8050],
        [0.3934, 0.1528, 0.5005],
        [0.2049, 0.4293, 0.1543],
        [0.2863, 0.2272, 0.0087],
        [0.2618, 0.0344, 0.4154],
        [0.4599, 0.3812, 0.2610],
        [0.0654, 0.7698, 0.4729],
        [0.3006, 0.3723, 0.9883]])

In [19]:
## We can change it to float64 tensor
float64_tensor= tensor1.type(torch.float64)
float64_tensor

tensor([[0.2845, 0.5758, 0.6724],
        [0.5913, 0.1309, 0.9990],
        [0.6855, 0.4802, 0.8050],
        [0.3934, 0.1528, 0.5005],
        [0.2049, 0.4293, 0.1543],
        [0.2863, 0.2272, 0.0087],
        [0.2618, 0.0344, 0.4154],
        [0.4599, 0.3812, 0.2610],
        [0.0654, 0.7698, 0.4729],
        [0.3006, 0.3723, 0.9883]], dtype=torch.float64)

### Manipulating Tensors (Tensor Operations)
Tensor operarions include
- Addition
- Subtraction
- Multiplication (element-wise)
- Division
- Matrix multiplication

A neural network will perform these operations on tensors to find patterns in the data and compute the output.

In [20]:
tensor1= torch.tensor([[1,2,3],
                       [4,4,6]])
tensor1

tensor([[1, 2, 3],
        [4, 4, 6]])

In [21]:
## Addition
tensor1 + 10

tensor([[11, 12, 13],
        [14, 14, 16]])

In [22]:
## Multiplication
tensor1 * 10

tensor([[10, 20, 30],
        [40, 40, 60]])

In [23]:
## Subtraction
tensor1 - 10

tensor([[-9, -8, -7],
        [-6, -6, -4]])

In [24]:
## Divsion
tensor1/10

tensor([[0.1000, 0.2000, 0.3000],
        [0.4000, 0.4000, 0.6000]])

We can also use PyTorch inbuilt functions to perform operations.

In [25]:
## Addition
torch.add(tensor1, 10)

tensor([[11, 12, 13],
        [14, 14, 16]])

In [26]:
## Multiplication
torch.multiply(tensor1, 10)

tensor([[10, 20, 30],
        [40, 40, 60]])

In [27]:
## Division
torch.divide(tensor1, 10)

tensor([[0.1000, 0.2000, 0.3000],
        [0.4000, 0.4000, 0.6000]])

**Multiplication Types**

Neural networks perform two types of multiplications:
- Element-wise multiplication
    - It entails multiplying each element of one matrix with the corresponding element of another matrix. Here matrices either have to be the same shape or one of them has to be a scalar as we have done above.
    - More common in CNNs
- Matrix-vector multiplication
    - Dot product of two vectors where the rows of the first matrix are multiplied by the columns of the second matrix and then summed up.
    - More common in standard fully connected neural networks
    - If matrix A has shape $(m,n)$ and matrix B has shape $(o,p)$, then matrix multiplication $A \cdot B$ is valid if $n=o$ (*inner dimensions*) (column shape= row shape)
    - The resulting matrix has the shape of the outer dimensions $(m,p)$

<img src="download.jpg" alt="alt text" style="width:450px; max-width:100%; height:auto;" />



In [28]:
tensor1

tensor([[1, 2, 3],
        [4, 4, 6]])

In [29]:
tensor2= torch.tensor([[1,2],
                       [3,4],
                       [5,6]])
tensor2

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [30]:
## Element-wise multiplication
tensor1 * tensor1

tensor([[ 1,  4,  9],
        [16, 16, 36]])

In [46]:
## Matrix multiplication is possible if the inner dimensions are equal 
 # (2,3), (3,2). Inner dimesnion 3
print('Tensor1:', tensor1, tensor1.shape)
print(f'\nTensor2: {tensor2, tensor2.shape}')
## Output is the outer dimension shape of the tensors involved
 # (2,3), (3,2). Outer Dimesion 2
matrix_tensor= torch.matmul(tensor1, tensor2)
print(f'\nMatrix_Tensor: {matrix_tensor, matrix_tensor.shape}')


Tensor1: tensor([[1, 2, 3],
        [4, 4, 6]]) torch.Size([2, 3])

Tensor2: (tensor([[1, 2],
        [3, 4],
        [5, 6]]), torch.Size([3, 2]))

Matrix_Tensor: (tensor([[22, 28],
        [46, 60]]), torch.Size([2, 2]))


Using PyTorch operator functions may be faster than using the operators directly.

In [33]:
tensor100= torch.rand(20, 30, 50)
tensor100.shape, tensor100.ndim


(torch.Size([20, 30, 50]), 3)

In [34]:
%%time
tensor100 * 2615

CPU times: total: 0 ns
Wall time: 3.1 ms


tensor([[[1858.7504,  510.1211, 1431.3658,  ...,  388.9445, 2400.6665,
          2179.8071],
         [1872.9313, 1396.6289, 1101.4708,  ...,  680.7652, 1156.2045,
          1156.1674],
         [1335.7968,  385.3945,  111.0113,  ..., 2576.9011,  504.4249,
           951.5404],
         ...,
         [2139.0793, 1446.9598,  869.7052,  ...,  540.5372, 1691.9453,
           119.7236],
         [1078.5128, 1187.0143, 2060.1650,  ..., 2388.7537, 1528.0359,
           620.5092],
         [1185.2776,  612.3404, 2484.0691,  ...,  233.6603, 1349.8766,
          1522.6404]],

        [[ 820.6760, 2599.3672, 1868.2236,  ...,  759.6648, 2524.7231,
          1446.9796],
         [2473.3376, 1657.0669,  209.1484,  ...,  957.4161,    3.9280,
          1967.7379],
         [1530.5225, 1111.9541, 1726.8674,  ..., 1318.6219, 1957.1090,
           993.6958],
         ...,
         [2041.5566,  474.8584, 1682.2051,  ..., 1845.2339,  828.3032,
          1160.3336],
         [2084.2478, 1349.4503,  663.523

In [35]:
%%time
torch.multiply(tensor1, 2615)

CPU times: total: 0 ns
Wall time: 0 ns


tensor([[ 2615,  5230,  7845],
        [10460, 10460, 15690]])

### Tensor Aggregation (Min, Max, Mean, Sum, etc)

In [36]:
## Tensor
x= torch.arange(50)
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

In [37]:
## Min
torch.min(x), x.min()

(tensor(0), tensor(0))

In [38]:
## Max
torch.max(x), x.max()

(tensor(49), tensor(49))

In [39]:
## Mean
 # Torch mean function cannot work on int64 datatype (Tensor Long datatype)
 # Needs to be converted to float datatype
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(24.5000), tensor(24.5000))

In [40]:
## Sum
torch.sum(x), x.sum()

(tensor(1225), tensor(1225))

In [41]:
## Median
torch.median(x), x.median() 

(tensor(24), tensor(24))

In [42]:
## Std Dev
 # Supports only float datatype
torch.std(x.type(torch.float32)), x.type(torch.float32).std()

(tensor(14.5774), tensor(14.5774))

#### Positional Min & Max
Which index the min and max lies

In [43]:
y= torch.rand(15)
y

tensor([0.4653, 0.2891, 0.8656, 0.5751, 0.5546, 0.7963, 0.5492, 0.9161, 0.2914,
        0.4442, 0.9127, 0.1940, 0.4399, 0.0900, 0.8082])

In [44]:
## Index positon of min
torch.argmin(y), y.argmin()

(tensor(13), tensor(13))

In [45]:
## Index position of max
torch.argmax(y), y.argmax()

(tensor(7), tensor(7))