In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.0.1


In [2]:
torch.cuda.is_available()

True

In [3]:
!nvidia-smi

Sat Jul  1 22:09:59 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 532.03                 Driver Version: 532.03       CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3080       WDDM | 00000000:0A:00.0  On |                  N/A |
|  0%   44C    P8               41W / 390W|    849MiB / 12288MiB |     18%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [4]:
# bracket notation helps to define the dimensions of a tensor in PyTorch
x = [[1, 2],[3, 4]]

In [5]:
x = torch.rand(5,3) # rand{0,1} 5 rows, 3 columns
print(x)
print(x.dtype)

tensor([[0.4964, 0.9329, 0.1361],
        [0.5665, 0.2957, 0.0744],
        [0.7036, 0.9525, 0.9620],
        [0.5882, 0.5303, 0.5702],
        [0.2858, 0.0828, 0.9421]])
torch.float32


### Why random tensors?
Random tensors are important because many neural networks are initialized with random numbers, i.e. parameters weights and biases, and adjust these random numbers to better represent the data.

In [6]:
# Example of bracket notation and concept of a 3dim-tensor: 2D RGB image
random_image_tensor = torch.rand(size=(3, 1280, 720)) # RGB color channel, height (pixel), width (pixel)
random_image_tensor.shape, random_image_tensor.ndim

(torch.Size([3, 1280, 720]), 3)

In [7]:
# Display random image
### Add code later ###

In [8]:
## Syntax notes ##
# torch.rand(X, Y, Z) == torch.rand(size=X, Y, Z), tensor of X * Y * Z dim

In [9]:
# Create a tensor of all zeros. 
zeros = torch.zeros(size=(3, 1280, 720))
zeros

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])

### Notes - Zero tensors
Zero tensors can function as a mask i.e. setting a column, row, etc, or the entire tensor of a target tensor to 0, using matrix mult.

In [10]:
# Using zero tensor as a mask
zeros*random_image_tensor # ... the same result as the zeros tensor above

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])

In [11]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 1280, 720))
ones

tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]],

        [[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]])

### Notes - Random, Zeros and Ones tensors
In practice, random tensors are used a lot, so are zeros tensor. Less commonly one tensors are used. But it stands to reason that Ones and Zeros tensors together could be use in conjunction to create manually masks for sparse solutions to a deep learning neural network, or to specify a neural network connectivity (graph) matrix.


In [12]:
# Use torch.arange(start, end 'end-1 index', step)
one_to_ten_tensor = torch.arange(1, 11)
print(one_to_ten_tensor)
ten_to_hundred_tensor = torch.arange(10, 101, 10) # == torch.arange(start=10, end=101, step=10)
print(ten_to_hundred_tensor)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])
tensor([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])


In [13]:
# Creating tensors like e.g. zeros_like(input=X_tensor)
ten_zeros_tensor = torch.zeros_like(one_to_ten_tensor)
print(ten_zeros_tensor)
print(one_to_ten_tensor.shape)
print(ten_zeros_tensor.shape)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([10])
torch.Size([10])


In [118]:
# Tensor Datatypes
y1 = torch.ones(5,3, dtype=torch.int16, device="cuda")
y2 = torch.tensor([2.0, 4.0, 6.0, 8.0], 
               dtype=torch.int16, # What datatype the tensor is
               device="cuda", # What device is the tensor on i.e. select device "cpu", "gpu", "cuda", and so on...
               requires_grad=False) # Whether or not to track gradients with this tensor's operations
y3 = torch.tensor([2.0, 2.0, 4.0, 4.0], 
               dtype=torch.int32,
               device="cpu",
               requires_grad=False)
print(y1)
print(y1.dtype)
print()
print(y2)
print(y2.dtype)
print(y2.device)
print()
print(y3)
print(y3.dtype)
print(y3.device)
print()
print("Attempt y2*y3. Should throw an error due to different devices.")
try: print(y2*y3)
except Exception as e: print(e, ":O")

tensor([[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]], device='cuda:0', dtype=torch.int16)
torch.int16

tensor([2, 4, 6, 8], device='cuda:0', dtype=torch.int16)
torch.int16
cuda:0

tensor([2, 2, 4, 4], dtype=torch.int32)
torch.int32
cpu

Attempt y2*y3. Should throw an error due to different devices.
Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! :O


In [119]:
# Changing a datatype
float_double_y2 = y2.type(torch.double) # == y2.type(torch.float64)
print(float_double_y2)
print(float_double_y2.shape, float_double_y2.size())

tensor([2., 4., 6., 8.], device='cuda:0', dtype=torch.float64)
torch.Size([4]) torch.Size([4])


### Notes - Tensor Data Types
There are many torch datatypes. Torch tensors are strictly-typed like Numpy arrays.
Refer to https://pytorch.org/docs/stable/tensors.html#torch-tensor for datatypes.
Recall the level of precision or max-min trade-off for the bit-size a datatype requires
Running a deep learning neural network on a smaller bit-size datatypes typically will be faster, so this embodies the precision vs. speed trade-off.


### Common sources of errors (check tensor attributes)
1. Tensors are not the correct/compatible datatype - check tensor.dtype
2. Tensors are not the correct shape - check tensor.shape (== tensor.size(), a method)
e.g. m x n * n x p shape required for matrix mult 
3. Tensors are not on the right device - check tensor.device
e.g. one tensor is on the "cpu" and another is on the "gpu", and you attempt to perform a matrix operation between these two tensors 

## Manipulating Tensors (Tensor operations)
Tensor operations include:
* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Matrix Multiplication

In [120]:
# Basic matrix operations
z = y1 - 0.5
print(z)
print((z - 0.5) * 2) # scale to -1 and 1 from {0,1} range 
#i.e. 0.5 on the range {0, 1} changes to 0 in range {-1,1}

tensor([[0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000]], device='cuda:0')
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], device='cuda:0')


In [121]:
torch.manual_seed(1157)
t1 = torch.rand(3,3)
t2 = torch.rand(3,3)
t3 = torch.rand(3,3)

In [122]:
print(2*z) # 2y : matrix*scalar
print(2*z + z - z - z) # 2z - z : matrix addition/subtraction 

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')
tensor([[0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000]], device='cuda:0')


In [123]:
print(f" z: {z}")
print(z.shape)
t4 = torch.rand(5,3, device="cuda")
print(f" t4: {t4}")
print(t4.shape)
t5 = torch.rand(3,5, device="cuda")
print(f" t5: {t5}")
print(t5.shape)
print(f" element-wise mult, z * t4: {z * t4}")
print("element-wise mult simply halves the entries of t4 because z is 0.5 in all entries")
print(f" mat-mult (dot product) z * t5: {torch.matmul(z, t5)}")
print("matrix multiplication (dot product) performs, in a 2D case, a row by col element-wise mult then summation ... ")
print("... for each entry in the result. E.g. the 1,1 entry is the element-wise multiplication and summation of...")
print("... the first row of the first tensor and the first column of the second tensor.")

 z: tensor([[0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000]], device='cuda:0')
torch.Size([5, 3])
 t4: tensor([[7.3699e-01, 1.9177e-01, 1.7897e-01],
        [1.8632e-01, 5.1089e-01, 9.2811e-01],
        [3.8807e-01, 3.9561e-01, 6.9177e-01],
        [5.3043e-04, 2.9116e-01, 6.7291e-01],
        [2.4199e-01, 8.6214e-01, 8.2421e-01]], device='cuda:0')
torch.Size([5, 3])
 t5: tensor([[0.4517, 0.2861, 0.8093, 0.7285, 0.9133],
        [0.3703, 0.2577, 0.7517, 0.5376, 0.6788],
        [0.6276, 0.5184, 0.7784, 0.8563, 0.1986]], device='cuda:0')
torch.Size([3, 5])
 element-wise mult, z * t4: tensor([[3.6849e-01, 9.5883e-02, 8.9483e-02],
        [9.3158e-02, 2.5545e-01, 4.6406e-01],
        [1.9403e-01, 1.9781e-01, 3.4589e-01],
        [2.6522e-04, 1.4558e-01, 3.3645e-01],
        [1.2100e-01, 4.3107e-01, 4.1210e-01]], device='cuda:0')
element-wise mult simply halves the entries of t4

In [124]:
tensor_a = z
tensor_b = t5

In [131]:
%%time
# 0ns
#torch.matmul(tensor_a, tensor_b) == torch.mm(tensor_a, tensor_b) shorthand version
torch.mm(tensor_a, tensor_b)

CPU times: total: 0 ns
Wall time: 0 ns


tensor([[0.7248, 0.5311, 1.1697, 1.0612, 0.8954],
        [0.7248, 0.5311, 1.1697, 1.0612, 0.8954],
        [0.7248, 0.5311, 1.1697, 1.0612, 0.8954],
        [0.7248, 0.5311, 1.1697, 1.0612, 0.8954],
        [0.7248, 0.5311, 1.1697, 1.0612, 0.8954]], device='cuda:0')

In [133]:
%%time
 # 3.5~4.0ms using cuda
# final shape is m x n * n * p => m * p
res_m_rows = tensor_a.shape[0]
res_p_cols = tensor_b.shape[1]

output_tensor = torch.empty((res_m_rows, res_p_cols), device="cuda") 
# create an empty tensor of m * p dimensions for manual 2D matrix multiplication
# print(output_tensor.shape) torch.Size([5, 5])

for i, row_i in enumerate(tensor_a):
    temp_list_ = [] 
    # temporary list (a row in the final tensor) to store sums of element-wise row_i * col_j for each i,j combination
    for j, col_j in enumerate(torch.t(tensor_b)): 
        # uses transpose (torch.t) to flip the orientation of tensor_b i.e. rows -> cols, cols -> rows ...
        # ... 'for' loops over the first indexing major i.e. rows in Python by default (C: row-major). 
        # There are packages and methods that use Fortran-major (F: col major) and loop over cols by default.    
        val_ = torch.sum(row_i*col_j).item()
        # uses torch.sum to sum the individual element-wise multiplications of the elements from row_i * col_j
        temp_list_.append(val_) # append each sum to the temporary list
    if (len(temp_list_)==res_p_cols):
        output_tensor[i,:] = torch.tensor(temp_list_, device="cuda") 
        # convert temp list (a row) to tensor and insert into output tensor using row index (row i)
    else: break # stop if temporary list is of wrong dimension
        
output_tensor

CPU times: total: 0 ns
Wall time: 4.04 ms


tensor([[0.7248, 0.5311, 1.1697, 1.0612, 0.8954],
        [0.7248, 0.5311, 1.1697, 1.0612, 0.8954],
        [0.7248, 0.5311, 1.1697, 1.0612, 0.8954],
        [0.7248, 0.5311, 1.1697, 1.0612, 0.8954],
        [0.7248, 0.5311, 1.1697, 1.0612, 0.8954]], device='cuda:0')

### Other Tensor functions

In [134]:
u = (torch.rand(3, 3) - 0.5) * 2 # scale values between -1 and 1
print('A random matrix, r:')
print(u)

# Common mathematical operations are supported:
print('\nAbsolute value of r:')
print(torch.abs(u))

# ...as are trigonometric functions:
print('\nInverse sine of r:')
print(torch.asin(u))

# ...and linear algebra operations like determinant and singular value decomposition
print('\nDeterminant of r:')
print(torch.det(u))
print('\nSingular value decomposition of r:')
print(torch.svd(u))

# ...and statistical and aggregate operations:
print('\nAverage and standard deviation of r:')
print(torch.std_mean(u))
print('\nMaximum value of r:')
print(torch.max(u))

A random matrix, r:
tensor([[ 0.4462, -0.6505,  0.2924],
        [ 0.3684, -0.4274, -0.1105],
        [-0.0149,  0.2337, -0.7636]])

Absolute value of r:
tensor([[0.4462, 0.6505, 0.2924],
        [0.3684, 0.4274, 0.1105],
        [0.0149, 0.2337, 0.7636]])

Inverse sine of r:
tensor([[ 0.4625, -0.7082,  0.2967],
        [ 0.3773, -0.4416, -0.1107],
        [-0.0149,  0.2359, -0.8688]])

Determinant of r:
tensor(-0.0037)

Singular value decomposition of r:
torch.return_types.svd(
U=tensor([[-0.7536, -0.2654,  0.6014],
        [-0.3748, -0.5782, -0.7247],
        [ 0.5401, -0.7715,  0.3362]]),
S=tensor([1.0887, 0.7005, 0.0048]),
V=tensor([[-0.4430, -0.4566, -0.7715],
        [ 0.7133,  0.3418, -0.6119],
        [-0.5431,  0.8214, -0.1743]]))

Average and standard deviation of r:
(tensor(0.4517), tensor(-0.0696))

Maximum value of r:
tensor(0.4462)
