## 00. PyTorch Fundamentals

In [1]:
import torch
print("Hello sir")
print(torch.__version__)

Hello sir
2.9.1+cu128


In [2]:
print (torch.cuda.is_available())

True


In [3]:
!nvidia-smi # This is to run terminal code. Cool.

Thu Dec 25 20:24:39 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.105.08             Driver Version: 580.105.08     CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 5080        On  |   00000000:01:00.0  On |                  N/A |
|  0%   37C    P1             49W /  360W |     724MiB /  16303MiB |      3%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

### Intro to Tensors

#### Creating Tensors

In [4]:
# scalar

scalar1 = torch.tensor(7) # scalar
not_a_scalar2 = torch.Tensor([7]) # not a scalar
print (scalar1)
print (not_a_scalar2)

tensor(7)
tensor([7.])


In [5]:
print (scalar1.dim()) 
print (scalar1.ndim)

print (not_a_scalar2.dim())
print (not_a_scalar2.ndim)

print (scalar1.shape)

0
0
1
1
torch.Size([])


In [6]:
print (scalar1.item())
print (not_a_scalar2.item())

7
7.0


In [7]:
#vector

vector = torch.tensor([7,7])
print (vector)

tensor([7, 7])


In [8]:
print (vector.ndim)
print (vector.dim())
print (vector.shape)

1
1
torch.Size([2])


In [9]:
#matrix

MATRIX = torch.tensor([[1,2],[3,4],[5,6]])
print (MATRIX.ndim)
print (MATRIX.shape)

2
torch.Size([3, 2])


In [10]:
print (MATRIX[0])
print (MATRIX[0].ndim)
print (MATRIX[0].shape)

print (MATRIX[1])
print (MATRIX[1].ndim)
print (MATRIX[1].shape)

print (MATRIX[2])
print (MATRIX[2].ndim)
print (MATRIX[2].shape)

tensor([1, 2])
1
torch.Size([2])
tensor([3, 4])
1
torch.Size([2])
tensor([5, 6])
1
torch.Size([2])


In [11]:
#tensor

TENSOR = torch.tensor([[[1,2],[2,3]], [[5,6],[6,7]]])
print (TENSOR)
print (TENSOR.ndim)
print (TENSOR.shape)

tensor([[[1, 2],
         [2, 3]],

        [[5, 6],
         [6, 7]]])
3
torch.Size([2, 2, 2])


In [12]:
print (TENSOR[0][0])
print (TENSOR[0][1])

tensor([1, 2])
tensor([2, 3])


#### Random Tensors

Why random tensors? Initialization before we train

In [13]:
# random tensor

random_tensor = torch.rand(3,4)
print (random_tensor)
print (random_tensor.ndim)
print (random_tensor.shape)

tensor([[0.3830, 0.8872, 0.8191, 0.7175],
        [0.9657, 0.2923, 0.4781, 0.8531],
        [0.4115, 0.0829, 0.0151, 0.2562]])
2
torch.Size([3, 4])


In [14]:
random_tensor2 = torch.rand(2,3,4,5)
print (random_tensor2)
print (random_tensor2.ndim)
print (random_tensor2.shape)

tensor([[[[0.3682, 0.0345, 0.6099, 0.8006, 0.7852],
          [0.4547, 0.3167, 0.2314, 0.7399, 0.3384],
          [0.9167, 0.0775, 0.5277, 0.5798, 0.7505],
          [0.5674, 0.1810, 0.0693, 0.4567, 0.9047]],

         [[0.5730, 0.9183, 0.1725, 0.7644, 0.7973],
          [0.2032, 0.7728, 0.3553, 0.0650, 0.0569],
          [0.3100, 0.2192, 0.8465, 0.2114, 0.5103],
          [0.1661, 0.0479, 0.4734, 0.4903, 0.7507]],

         [[0.7726, 0.9839, 0.6413, 0.7893, 0.9875],
          [0.2086, 0.7421, 0.1963, 0.6371, 0.5192],
          [0.3661, 0.6332, 0.4912, 0.8826, 0.9359],
          [0.2712, 0.4597, 0.4365, 0.6678, 0.1995]]],


        [[[0.6585, 0.4416, 0.7593, 0.7866, 0.0806],
          [0.1925, 0.4010, 0.0260, 0.6084, 0.3882],
          [0.8621, 0.9567, 0.7728, 0.8632, 0.2657],
          [0.7992, 0.5016, 0.9397, 0.5081, 0.1856]],

         [[0.2592, 0.1725, 0.6085, 0.9180, 0.9281],
          [0.9352, 0.1729, 0.6268, 0.4446, 0.6032],
          [0.1058, 0.9471, 0.4765, 0.0200, 0.6932],
  

In [15]:
# Random images

random_image_size_tensor1 = torch.rand(size=[640,480,3]) #height,width,color-channels
print (random_image_size_tensor1.shape, random_image_size_tensor1.ndim)

random_image_size_tensor2 = torch.rand(size=[3, 640,480]) #color-channels, height, width
print (random_image_size_tensor2.shape, random_image_size_tensor2.ndim)

torch.Size([640, 480, 3]) 3
torch.Size([3, 640, 480]) 3


In [16]:
#Zero tensors

zero = torch.zeros(size = (3,4)) # can use either () or []
print (zero)
print (zero.ndim, ",", zero.shape)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
2 , torch.Size([3, 4])


In [17]:
# One tensors

ones = torch.ones(size = (2,5))
print (ones)
print (ones.ndim, ",", ones.shape)

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
2 , torch.Size([2, 5])


In [18]:
#Multiplying tensors (element-wise?)

random_tensor*zero

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [19]:
print (random_tensor.dtype)
print (zero.dtype)
print (ones.dtype)
print (scalar1.dtype)

torch.float32
torch.float32
torch.float32
torch.int64


#### Creating a range

In [21]:
range_tensor = torch.arange(1,10)
print (range_tensor)

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])


In [24]:
range_tensor.ndim, range_tensor.shape

(1, torch.Size([9]))

In [27]:
range_tensor2 = torch.arange(1,2,0.1)
range_tensor2

tensor([1.0000, 1.1000, 1.2000, 1.3000, 1.4000, 1.5000, 1.6000, 1.7000, 1.8000,
        1.9000])

In [28]:
range_tensor2.dtype

torch.float32

In [29]:
range_tensor3 = torch.arange(start=1, end=2, step=0.2)
range_tensor3

tensor([1.0000, 1.2000, 1.4000, 1.6000, 1.8000])

In [33]:
one_tensor3 = torch.ones_like(input=range_tensor3)
one_tensor3

tensor([1., 1., 1., 1., 1.])

#### Tensor datatypes
***Note***: Various things to be careful about tensors:
1. shape
2. device
3. float precision (i.e. type)

In [34]:
fl16_tensor = torch.tensor([1,2,3,4,5], dtype=torch.float16)
fl16_tensor

tensor([1., 2., 3., 4., 5.], dtype=torch.float16)

In [35]:
fl16_tensor2 = torch.tensor([1,2,3], dtype=torch.float16, device=None, requires_grad=False)
fl16_tensor2

tensor([1., 2., 3.], dtype=torch.float16)

In [36]:
fl16_tensor2.requires_grad, fl16_tensor2.device

(False, device(type='cpu'))

In [42]:
fl16_tensor3 = torch.tensor([5,6], device="cuda")
fl16_tensor3

tensor([5, 6], device='cuda:0')

In [44]:
print(fl16_tensor3.dtype)
fl16_tensor4 = fl16_tensor3.type(torch.float16)
print(fl16_tensor4.dtype)
fl16_tensor5 = fl16_tensor4.type(torch.half)
print(fl16_tensor5.dtype)

torch.int64
torch.float16
torch.float16


In [45]:
fl16_tensor3 * fl16_tensor4

tensor([25., 36.], device='cuda:0', dtype=torch.float16)

In [50]:
int32_tensor = torch.tensor([3,6,9], dtype=torch.int32)
float32_tensor = torch.tensor([3,6,9], dtype=torch.float32)
mult_tensor = int32_tensor * float32_tensor
print (f"Tensor datatype = {mult_tensor.dtype}")
print (f"Tensor device = {mult_tensor.device}")
print (f"Tensor requires_grad = {mult_tensor.requires_grad}")
print (f"Tensor shape = {mult_tensor.shape}")

Tensor datatype = torch.float32
Tensor device = cpu
Tensor requires_grad = False
Tensor shape = torch.Size([3])


#### Manipulating tensor operations
Includes:
- Addition
- Subtraction
- Multiplication
- Division
- Matrix-Mult

In [52]:
test1 = torch.tensor([1,2,3])
#add to a tensor
print(test1 + 10)
#multiple to a tensor
print(test1 * 10)
#subtract from a tensor
print(test1 - 1)
#divide from a tensor
print(test1 / 2)

tensor([11, 12, 13])
tensor([10, 20, 30])
tensor([0, 1, 2])
tensor([0.5000, 1.0000, 1.5000])


In [55]:
print(torch.mul(test1, 10))
print(torch.add(test1, 10))
print(torch.sub(test1,1))
print(torch.div(test1, 2))

tensor([10, 20, 30])
tensor([11, 12, 13])
tensor([0, 1, 2])
tensor([0.5000, 1.0000, 1.5000])


In [56]:
# Matrix-mult - same as dot_product

a1 = torch.tensor([[1,2],[3,4],[5,6]])
a2 = torch.tensor([[1,2,3],[4,5,6]])
print(a1.shape)
print(a2.shape)

torch.Size([3, 2])
torch.Size([2, 3])


In [58]:
a1a2 = torch.matmul(a1,a2)
print(a1a2.shape)
print(a1a2)
a2a1 = torch.matmul(a2,a1)
print(a2a1.shape)
print(a2a1)

torch.Size([3, 3])
tensor([[ 9, 12, 15],
        [19, 26, 33],
        [29, 40, 51]])
torch.Size([2, 2])
tensor([[22, 28],
        [49, 64]])


In [72]:
a3 = torch.tensor([1,2,3,4,5,6,7,8,9])
a3_square = torch.matmul(a3,a3) #Dot Product
print(a3_square)
print(a3_square.shape)

tensor(285)
torch.Size([])


In [73]:
a4 = torch.tensor([[1,2,3]])
print(a4.shape)
a5 = torch.tensor([[1],[2],[3]])
print(a5.shape)
print(torch.matmul(a4,a5).shape) # actual matrix mult
print(torch.matmul(a5,a4).shape) # actual matrix mult

torch.Size([1, 3])
torch.Size([3, 1])
torch.Size([1, 1])
torch.Size([3, 3])


In [76]:
%%time
value = 0
for i in range(len(a3)):
    value += (a3[i]*a3[i])
print(value)

tensor(285)
CPU times: user 1.37 ms, sys: 85 μs, total: 1.45 ms
Wall time: 1.05 ms


In [77]:
%%time
print(torch.matmul(a3,a3))

tensor(285)
CPU times: user 814 μs, sys: 511 μs, total: 1.33 ms
Wall time: 1.1 ms
