In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.5.1+cu124


## Tensors

In [None]:
scalar = torch.tensor(5)
print(scalar.ndim)
print(scalar.item()) #python int

0
5


In [None]:
vector = torch.tensor([1,2])
print(vector.ndim)
print(vector.shape)

1
torch.Size([2])


In [None]:
MATRIX = torch.tensor([[1,2],[3,4]])
print(MATRIX.ndim)
print(MATRIX.shape)

2
torch.Size([2, 2])


In [None]:
TENSOR = torch.tensor([[[1,2,3], [4,5,6], [7,8,9]]])
print(TENSOR.ndim)
print(TENSOR.shape)

3
torch.Size([1, 3, 3])


## Random Tensor

In [None]:
random_tensor = torch.rand(size=(3, 4, 5))
print(random_tensor.dtype) #deafult is float32
random_tensor

torch.float32


tensor([[[0.6911, 0.1069, 0.0724, 0.3907, 0.4336],
         [0.8064, 0.4750, 0.8217, 0.7795, 0.1267],
         [0.6605, 0.3173, 0.0203, 0.7254, 0.8942],
         [0.8080, 0.4985, 0.5169, 0.5292, 0.9619]],

        [[0.1449, 0.1932, 0.1917, 0.8675, 0.3733],
         [0.8026, 0.2773, 0.3358, 0.8137, 0.2311],
         [0.2099, 0.9394, 0.0568, 0.2975, 0.5876],
         [0.2661, 0.7114, 0.6820, 0.0858, 0.4019]],

        [[0.9727, 0.1632, 0.9795, 0.7074, 0.3520],
         [0.8169, 0.7634, 0.7523, 0.7895, 0.5567],
         [0.4425, 0.7151, 0.0885, 0.1523, 0.2546],
         [0.5488, 0.9412, 0.2176, 0.1257, 0.3787]]])

In [None]:
zeros = torch.zeros(size=(4,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
ones = torch.ones(size=(2,4,4))
ones

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

## Range of Tensors and Tensor-Like


In [None]:
one_to_ten = torch.arange(start = 1, end = 11, step = 2)
one_to_ten

tensor([1, 3, 5, 7, 9])

In [None]:
five_zeros = torch.zeros_like(input = one_to_ten) # input refers to target tensor shape reference
five_zeros

tensor([0, 0, 0, 0, 0])

In [None]:
five_ones = torch.ones_like(input = one_to_ten)
five_ones

tensor([1, 1, 1, 1, 1])

## Tensor Data Types

Tensors must have same device and data type to be operated together. (And valid dimensions)

In [None]:
f32_tensor = torch.tensor([3.0, 4.0, 5.0],
                          dtype = None, # tensor data type
                          device = 'cpu', # device tensor is on
                          requires_grad = False) # track gradients?
f32_tensor.dtype

torch.float32

In [None]:
f16_tensor = torch.tensor([3, 5, 7],
                          dtype = torch.float16,
                          device = 'cpu')
print(f16_tensor.dtype)
print(f16_tensor.shape)
print(f16_tensor.device)

torch.float16
torch.Size([3])
cpu


## Operations on Tensors

In [None]:
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [None]:
torch.add(tensor,10)

tensor([11, 12, 13])

In [None]:
tensor2 = torch.tensor([4,5,6])
tensor + tensor2

tensor([5, 7, 9])

In [None]:
torch.mul(tensor, 10)

tensor([10, 20, 30])

In [None]:
torch.mul(tensor, tensor2)

tensor([ 4, 10, 18])

In [None]:
tensor * tensor2

tensor([ 4, 10, 18])

### Matmul

In [None]:
torch.matmul(tensor, tensor2) # 1D and 1D would be dot product (single number)

tensor(32)

In [None]:
tensor3 = torch.tensor([[1,2],[3,4]])
tensor4 = torch.tensor([[5,6],[7,8]])

print(torch.matmul(tensor3, tensor4)) # 2x2 and 2x2 make a 2x2 matrix

torch.mul(tensor3, tensor4) # 2x2 matrix element-wise


tensor([[19, 22],
        [43, 50]])


tensor([[ 5, 12],
        [21, 32]])

In [None]:
torch.matmul(torch.rand(3,4), torch.rand(4,5)) # results in 3x5 matrix
#shapes must allign, the columns of A must bee same as rows of B

tensor([[0.7780, 1.5254, 2.2823, 1.5050, 0.8163],
        [0.7239, 1.3859, 1.9374, 1.2801, 0.8158],
        [0.7976, 0.7292, 1.0635, 0.8299, 0.6483]])

In [None]:
torch.mm(torch.rand(3,4), torch.rand(4,5)) # same as matmul

tensor([[1.1899, 0.6884, 1.9012, 0.9809, 1.7741],
        [0.9656, 0.4833, 1.1900, 0.9262, 1.6177],
        [1.0525, 0.5247, 1.4277, 0.6684, 1.3629]])

### Transpose

In [None]:
tensor5 = torch.rand(3,5)
tensor6 = torch.rand(3,5)
# Cant multiply them

print(tensor5)
print(tensor6)

tensor([[0.2891, 0.2757, 0.2576, 0.3230, 0.5473],
        [0.4717, 0.0022, 0.4191, 0.0635, 0.4203],
        [0.4622, 0.6599, 0.0283, 0.0991, 0.5727]])
tensor([[0.3142, 0.6761, 0.5676, 0.4364, 0.3819],
        [0.0463, 0.5624, 0.9928, 0.1261, 0.2062],
        [0.1985, 0.3800, 0.6589, 0.6058, 0.9013]])


In [None]:
tensor5*tensor6 # element wise must need same shape

tensor([[0.0909, 0.1864, 0.1462, 0.1410, 0.2090],
        [0.0218, 0.0012, 0.4161, 0.0080, 0.0867],
        [0.0917, 0.2508, 0.0187, 0.0600, 0.5162]])

In [None]:
# But for matmul, Transpose is required
print(tensor6.shape)
print(tensor6.T.shape)

torch.matmul(tensor5, tensor6.T) # 3x3

torch.Size([3, 5])
torch.Size([5, 3])


tensor([[0.7735, 0.5778, 1.0209],
        [0.5758, 0.5338, 0.7879],
        [0.8694, 0.5512, 0.9374]])

## Tensor Aggregations

In [None]:
tensor = torch.rand(20) * 100
tensor

tensor([55.5264, 43.3596, 26.5226, 54.9240, 27.2781, 68.2043, 24.7939, 88.6487,
        30.7357, 50.0397, 57.3596, 40.6783, 48.8555, 70.3824, 90.0316, 49.1853,
        50.4805, 22.2708,  6.0401,  6.8605])

In [None]:
tensor.max(), torch.max(tensor)

(tensor(90.0316), tensor(90.0316))

In [None]:
tensor.min(), torch.min(tensor)

(tensor(6.0401), tensor(6.0401))

In [None]:
tensor.mean(), torch.mean(tensor) # dtype must be float32

(tensor(45.6089), tensor(45.6089))

In [None]:
tensor.sum(), torch.sum(tensor)

(tensor(912.1777), tensor(912.1777))

In [None]:
tensor.argmin() # index of min loaction

tensor(18)

In [None]:
tensor.argmax()

tensor(14)

## Reshaping

In [None]:
tensor = torch.rand(3,4) # 3x4 - 12
tensor, tensor.shape

(tensor([[0.2192, 0.5963, 0.9382, 0.3060],
         [0.2966, 0.8725, 0.9057, 0.3185],
         [0.3196, 0.0035, 0.0976, 0.3816]]),
 torch.Size([3, 4]))

In [None]:
reshaped_tensor = tensor.reshape(2,6) # not inplace
reshaped_tensor, reshaped_tensor.shape

(tensor([[0.2192, 0.5963, 0.9382, 0.3060, 0.2966, 0.8725],
         [0.9057, 0.3185, 0.3196, 0.0035, 0.0976, 0.3816]]),
 torch.Size([2, 6]))

In [None]:
tensor.reshape(1,12)

tensor([[0.2192, 0.5963, 0.9382, 0.3060, 0.2966, 0.8725, 0.9057, 0.3185, 0.3196,
         0.0035, 0.0976, 0.3816]])

In [None]:
tensor.reshape(12,1)

tensor([[0.2192],
        [0.5963],
        [0.9382],
        [0.3060],
        [0.2966],
        [0.8725],
        [0.9057],
        [0.3185],
        [0.3196],
        [0.0035],
        [0.0976],
        [0.3816]])

### View

In [None]:
# A view of tensor shares same memory as tensor

x = tensor.view(1, 12)
x, x.shape

(tensor([[0.2192, 0.5963, 0.9382, 0.3060, 0.2966, 0.8725, 0.9057, 0.3185, 0.3196,
          0.0035, 0.0976, 0.3816]]),
 torch.Size([1, 12]))

In [None]:
x[:, 0] = 5 # changing one, changes other
x, tensor

(tensor([[5.0000e+00, 5.9632e-01, 9.3819e-01, 3.0596e-01, 2.9658e-01, 8.7252e-01,
          9.0565e-01, 3.1846e-01, 3.1959e-01, 3.4550e-03, 9.7637e-02, 3.8158e-01]]),
 tensor([[5.0000e+00, 5.9632e-01, 9.3819e-01, 3.0596e-01],
         [2.9658e-01, 8.7252e-01, 9.0565e-01, 3.1846e-01],
         [3.1959e-01, 3.4550e-03, 9.7637e-02, 3.8158e-01]]))

### Stacking

In [None]:
tensor = torch.arange(0,10)
tensor, tensor.shape

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), torch.Size([10]))

In [None]:
vstack = torch.stack([tensor, tensor, tensor], dim = 0) # vertical stacking
vstack, vstack.shape

(tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]),
 torch.Size([3, 10]))

In [None]:
hstack = torch.stack([tensor, tensor, tensor], dim = 1) # horizontal stacking
hstack, hstack.shape

(tensor([[0, 0, 0],
         [1, 1, 1],
         [2, 2, 2],
         [3, 3, 3],
         [4, 4, 4],
         [5, 5, 5],
         [6, 6, 6],
         [7, 7, 7],
         [8, 8, 8],
         [9, 9, 9]]),
 torch.Size([10, 3]))

In [None]:
torch.vstack([tensor, tensor, tensor])

tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [None]:
torch.hstack([tensor, tensor, tensor])


tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3,
        4, 5, 6, 7, 8, 9])

### Squeeze and Unsqueeze

In [None]:
tensor = torch.rand(1,1,5)
print(tensor)

squeezed = tensor.squeeze() # remove single dims (not inplace)
print(squeezed)

torch.squeeze(tensor) # same as tensor.squeeze()

tensor([[[0.9310, 0.3042, 0.2726, 0.6273, 0.4811]]])
tensor([0.9310, 0.3042, 0.2726, 0.6273, 0.4811])


tensor([0.9310, 0.3042, 0.2726, 0.6273, 0.4811])

In [None]:
print(squeezed.shape)
torch.unsqueeze(squeezed, dim = 0) # add single dim

torch.Size([5])


tensor([[0.9310, 0.3042, 0.2726, 0.6273, 0.4811]])

In [None]:
torch.unsqueeze(squeezed, dim = 1)

tensor([[0.9310],
        [0.3042],
        [0.2726],
        [0.6273],
        [0.4811]])

### Permute

In [None]:
tensor = torch.rand(256,256,3) # h,w,c
permuted = tensor.permute(2,0,1) # c,h,w; so 0->1, 1->2, 2->0
print(tensor.shape)
print(permuted.shape)

# rearrange the dimensions; returns a view (shared memory)

torch.Size([256, 256, 3])
torch.Size([3, 256, 256])


## Indexing

Similar to NumPy

In [None]:
tensor = torch.randint(low = 0, high = 15, size = (1,3,5))
tensor

tensor([[[ 9, 14,  1, 10,  2],
         [14, 13,  1, 11,  2],
         [14,  0, 12, 11, 14]]])

In [None]:
tensor[0, 0, 0]

tensor(9)

In [None]:
tensor[:, :, :] # Entire

tensor([[[ 9, 14,  1, 10,  2],
         [14, 13,  1, 11,  2],
         [14,  0, 12, 11, 14]]])

In [None]:
tensor[0]

tensor([[ 9, 14,  1, 10,  2],
        [14, 13,  1, 11,  2],
        [14,  0, 12, 11, 14]])

In [None]:
tensor[0][0]

tensor([ 9, 14,  1, 10,  2])

In [None]:
tensor[0,0,:] # Same as tensor[0][0]

tensor([ 9, 14,  1, 10,  2])

In [None]:
tensor[0][0][:] # Same as tensor[0][0]

tensor([ 9, 14,  1, 10,  2])

In [None]:
# : means to select everything from that dimension

tensor[0, :, 0]

tensor([ 9, 14, 14])

In [None]:
tensor[:, :, 4]

tensor([[ 2,  2, 14]])

## PyTorch vs Numpy

In [None]:
# Might start with data in numpy, needed to convert to tensor

In [None]:
# Numpy to Tensor
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
array.dtype, tensor.dtype

(dtype('float64'), torch.float64)

In [None]:
tensor = torch.from_numpy(array).type(torch.float32)
tensor, tensor.dtype

(tensor([1., 2., 3., 4., 5., 6., 7.]), torch.float32)

In [None]:
array = tensor.numpy()
array

array([1., 2., 3., 4., 5., 6., 7.], dtype=float32)

## Random Reproducibility

In [None]:
# Reproducable experiments need some less randomness
# Random seed is required

In [None]:
tensorA = torch.rand(3,3)
tensorB = torch.rand(3,3)

print(tensorA)
print(tensorB)
tensorA==tensorB

tensor([[0.0227, 0.1168, 0.0561],
        [0.2620, 0.2196, 0.5718],
        [0.9271, 0.7009, 0.8585]])
tensor([[0.4799, 0.1706, 0.8212],
        [0.2671, 0.5357, 0.3940],
        [0.7657, 0.4972, 0.4701]])


tensor([[False, False, False],
        [False, False, False],
        [False, False, False]])

In [None]:
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
tensorA = torch.rand(3,3)

torch.manual_seed(RANDOM_SEED) # Needs to be done again and again
tensorB = torch.rand(3,3)

print(tensorA)
print(tensorB)
tensorA==tensorB

tensor([[0.8823, 0.9150, 0.3829],
        [0.9593, 0.3904, 0.6009],
        [0.2566, 0.7936, 0.9408]])
tensor([[0.8823, 0.9150, 0.3829],
        [0.9593, 0.3904, 0.6009],
        [0.2566, 0.7936, 0.9408]])


tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

## PyTorch Objects on GPU

In [None]:
import torch
import numpy as np
print(torch.__version__)

2.5.1+cu124


### Device agnostic code

In [None]:
torch.cuda.is_available()

True

In [None]:
!nvidia-smi

Mon Feb  3 13:44:59 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   50C    P8             11W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
torch.cuda.device_count()

1

In [None]:
tensor = torch.tensor([1,2,3]) # Not on gpu
tensor.device

device(type='cpu')

In [None]:
# Move to GPU

tensor_gpu = tensor.to(device)
tensor_gpu

tensor([1, 2, 3], device='cuda:0')

In [None]:
# Move to CPU (if we need to make np.ndarray from tensor)

#tensor.numpy() # Gives error

tensor_gpu.cpu().numpy() # Works; returns a copy, no shared memory

array([1, 2, 3])