In [285]:
# Import PyTorch and check CUDA availability
import torch
print(torch.__version__)  # Print PyTorch version
print(torch.cuda.is_available())  # Check if CUDA (GPU support) is available
print(torch.cuda.device_count())  # Number of available CUDA devices

2.5.1+cu121
True
1


# **BASICS**
This section covers the basics of PyTorch tensors, including scalars, vectors, matrices, and higher-dimensional tensors.

![Scalar, Vector, Matrix, Tensor](https://hadrienj.github.io/assets/images/2.1/scalar-vector-matrix-tensor.png)

In [286]:
# Scalar tensor (0-dimensional)
scalar = torch.tensor(7)
scalar  # Output: tensor(7)

tensor(7)

In [287]:
# Get the number of dimensions of the scalar tensor
scalar.ndim  # Output: 0

0

In [288]:
# Get the shape of the scalar tensor
scalar.shape  # Output: torch.Size([])

torch.Size([])

In [289]:
# Get the Python number from the scalar tensor
scalar.item()  # Output: 7

7

In [290]:
# Vector tensor (1-dimensional)
vector = torch.tensor([7, 7])
vector  # Output: tensor([7, 7])

tensor([7, 7])

In [291]:
# Get the number of dimensions of the vector tensor
vector.ndim  # Output: 1

1

In [292]:
# Get the shape of the vector tensor
vector.shape  # Output: torch.Size([2])

torch.Size([2])

In [293]:
# Matrix tensor (2-dimensional)
matrix = torch.tensor([[7, 8, 8],
                       [9, 10, 9]])
matrix  # Output: 2x3 matrix

tensor([[ 7,  8,  8],
        [ 9, 10,  9]])

In [294]:
# Get the number of dimensions of the matrix tensor
matrix.ndim  # Output: 2

2

In [295]:
# Get the shape of the matrix tensor
matrix.shape  # Output: torch.Size([2, 3])

torch.Size([2, 3])

In [296]:
# 3D Tensor (3-dimensional)
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
TENSOR  # Output: 1x3x3 tensor

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [297]:
# Get the number of dimensions of the 3D tensor
TENSOR.ndim  # Output: 3

3

In [298]:
# Get the shape of the 3D tensor
TENSOR.shape  # Output: torch.Size([1, 3, 3])

torch.Size([1, 3, 3])

In [299]:
# 4D Tensor example
TENSOR_2 = torch.tensor([[[ 1, 2, 3],
                          [ 4, 5, 6],
                          [ 1, 2, 3],
                          [ 4, 5, 6]],

                         [[10, 11, 12],
                          [13, 14, 15],
                          [10, 11, 12],
                          [13, 14, 15]]])
TENSOR_2  # Output: 2x4x3 tensor

tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 1,  2,  3],
         [ 4,  5,  6]],

        [[10, 11, 12],
         [13, 14, 15],
         [10, 11, 12],
         [13, 14, 15]]])

In [300]:
# Get the number of dimensions of the 4D tensor
TENSOR_2.ndim  # Output: 3

3

In [301]:
# Get the shape of the 4D tensor
TENSOR_2.shape  # Output: torch.Size([2, 4, 3])

torch.Size([2, 4, 3])

In [302]:
# Loop through the first dimension of TENSOR_2 and print each sub-tensor
for i in range(len(TENSOR_2)):
    print(TENSOR_2[i])

tensor([[1, 2, 3],
        [4, 5, 6],
        [1, 2, 3],
        [4, 5, 6]])
tensor([[10, 11, 12],
        [13, 14, 15],
        [10, 11, 12],
        [13, 14, 15]])


In [303]:
# Loop through the first and second dimensions of TENSOR_2 and print each row
for i in range(len(TENSOR_2)):
    for j in range(len(TENSOR_2[i])):
        print(TENSOR_2[i][j])

tensor([1, 2, 3])
tensor([4, 5, 6])
tensor([1, 2, 3])
tensor([4, 5, 6])
tensor([10, 11, 12])
tensor([13, 14, 15])
tensor([10, 11, 12])
tensor([13, 14, 15])


In [304]:
# Loop through all elements of TENSOR_2 and print each value
for i in range(len(TENSOR_2)):
    for j in range(len(TENSOR_2[i])):
        for k in range(len(TENSOR_2[i][j])):
            print(TENSOR_2[i][j][k])

tensor(1)
tensor(2)
tensor(3)
tensor(4)
tensor(5)
tensor(6)
tensor(1)
tensor(2)
tensor(3)
tensor(4)
tensor(5)
tensor(6)
tensor(10)
tensor(11)
tensor(12)
tensor(13)
tensor(14)
tensor(15)
tensor(10)
tensor(11)
tensor(12)
tensor(13)
tensor(14)
tensor(15)


# **RANDOM TENSORS**
This section will cover how to create tensors with random values.

In [305]:
## why random tensors?
# Random tensors are useful for initializing weights in neural networks, data augmentation, and testing models.
# They help in creating diverse datasets and avoiding overfitting by introducing variability.



# **A. Uniform Distribution**


In [306]:
#Uniform Distribution
# 1.torch.rand(*size)

random_tensor= torch.rand(2, 3, 4)  # Create a random tensor with values uniformly distributed between 0 and 1

random_tensor  # Output: 2x3x4 tensor with random values

tensor([[[0.0549, 0.4944, 0.2001, 0.9065],
         [0.1843, 0.4180, 0.7188, 0.0878],
         [0.7205, 0.3300, 0.3622, 0.5796]],

        [[0.4292, 0.0443, 0.4221, 0.0352],
         [0.6698, 0.1253, 0.6560, 0.5416],
         [0.9315, 0.2388, 0.9832, 0.6727]]])

In [307]:
random_tensor.shape

torch.Size([2, 3, 4])

In [308]:
random_tensor.ndim

3

In [309]:
#torch.randint(low, high, size)

# Create a random tensor with integer values between 0 and 10
random_int_tensor = torch.randint(0, 10, (2, 3, 4))  # 2x3x4 tensor with random integers
random_int_tensor


tensor([[[6, 8, 1, 1],
         [3, 5, 9, 9],
         [4, 2, 5, 9]],

        [[8, 2, 0, 9],
         [2, 8, 7, 5],
         [0, 9, 7, 7]]])

In [310]:
random_image_size_tensor=torch.rand(3,224,224)

random_image_size_tensor.shape  , random_image_size_tensor.ndim

(torch.Size([3, 224, 224]), 3)

In [311]:
#torch.rand_like(input)

# Create a random tensor with the same shape as another tensor (ensure dtype is float)
random_tensor_like = torch.rand_like(random_int_tensor, dtype=torch.float)

random_tensor_like

tensor([[[0.9416, 0.0735, 0.9227, 0.0393],
         [0.8484, 0.6823, 0.3721, 0.0014],
         [0.8630, 0.2822, 0.1260, 0.3181]],

        [[0.6472, 0.4358, 0.0290, 0.2847],
         [0.0447, 0.1374, 0.5808, 0.7987],
         [0.1672, 0.1141, 0.1320, 0.6030]]])

# **B. Normal (Gaussian) Distribution**

In [312]:
# torch.randn(*size)

# Create a random tensor with values from a normal distribution (mean=0, std=1)

x = torch.randn(3, 4)
x

tensor([[-1.4807, -0.9390,  0.0172,  0.1326],
        [-0.8235, -1.4217, -0.4019, -1.9991],
        [-1.0845,  1.0989, -0.9898, -0.1510]])

In [313]:
# x = torch.randn(3, 4)


In [314]:
# torch.normal(mean, std, size)
x_normal = torch.normal(mean=0., std=1., size=(3, 3))

x_normal


tensor([[-0.0788, -1.6242, -0.2751],
        [ 0.3969, -1.3153,  0.8843],
        [ 1.1825, -0.1379,  0.7743]])

## **ZEROS & ONES**
**Creating tensors filled with 0s or 1s is fundamental for:**

- Weight or bias initialization.

- Placeholder tensors for results.

- Masks or binary flags.

- Identity operations (multiplicative identity: ones; additive identity: zeros).

- Pre-allocating space to be filled later (performance-friendly).



In [315]:
x = torch.zeros(3)       # 1D vector: [0, 0, 0]
y = torch.zeros(2, 3)    # 2x3 matrix
z = torch.zeros(2, 3, 4)  # 2x3x4 tensor

x, y, z  # Output: 1D vector, 2D matrix, and 3D tensor of zeros

(tensor([0., 0., 0.]),
 tensor([[0., 0., 0.],
         [0., 0., 0.]]),
 tensor([[[0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.]],
 
         [[0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.]]]))

In [316]:
torch.zeros(2, 2, dtype=torch.float64, device='cuda', requires_grad=True)


tensor([[0., 0.],
        [0., 0.]], device='cuda:0', dtype=torch.float64, requires_grad=True)

In [317]:
x = torch.ones(3)        # [1, 1, 1]
y = torch.ones(2, 3)     # 2x3 of ones
z = torch.ones(2, 3, 4)   # 2x3x4 tensor of ones
x, y, z  # Output: 1D vector, 2D matrix, and 3D tensor of ones

(tensor([1., 1., 1.]),
 tensor([[1., 1., 1.],
         [1., 1., 1.]]),
 tensor([[[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]],
 
         [[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]]]))

In [318]:
torch.ones(2, 2, dtype=torch.float64, device='cuda', requires_grad=True)


tensor([[1., 1.],
        [1., 1.]], device='cuda:0', dtype=torch.float64, requires_grad=True)

In [319]:
# torch.zeros_like / torch.ones_like
x_zeros_like = torch.zeros_like(x, dtype=torch.float64, device='cuda')
x_ones_like = torch.ones_like(y, dtype=torch.float64, device='cuda')
x_zeros_like, x_ones_like  # Output: Tensors with the same shape as x and y, filled with zeros and ones respectively

(tensor([0., 0., 0.], device='cuda:0', dtype=torch.float64),
 tensor([[1., 1., 1.],
         [1., 1., 1.]], device='cuda:0', dtype=torch.float64))

In [320]:
# torch.full
## When you want to fill with any constant, use torch.full:

x = torch.full((2, 3), 7)  # 2x3 tensor filled with 7s
x  # Output: 2x3 tensor filled with 7s

tensor([[7, 7, 7],
        [7, 7, 7]])

# **RANGE OF TENSORS**

In [321]:
# torch.arange(start, end, step)
# Create a tensor with values from 0 to 10 with a step of 2
x = torch.arange(0, 11, 2)
x  # Output: tensor([0, 2, 4, 6, 8])

tensor([ 0,  2,  4,  6,  8, 10])

# **TENSOR DATA TYPES**

📌 1️⃣ What is dtype?
dtype = data type descriptor for a tensor.

Defines what kind of data is stored (integers, floats, bool, complex).

Controls:

- Precision (bits per element)

- Storage size (RAM/VRAM footprint)

- Valid operations (some ops only work for floats)



| Dtype            | PyTorch                          | Bits  | Typical Use                 |
| ---------------- | -------------------------------- | ----- | --------------------------- |
| 32-bit float     | `torch.float32` (`torch.float`)  | 32    | Default for DL, fast on GPU |
| 64-bit float     | `torch.float64` (`torch.double`) | 64    | High-precision math, stats  |
| 16-bit float     | `torch.float16` (`torch.half`)   | 16    | Mixed precision training    |
| BF16             | `torch.bfloat16`                 | 16    | Mixed precision (CPU, TPU)  |
| 8-bit float      | `torch.float8_e4m3fn`            | 8     | Experimental, quantization  |
| 8-bit int        | `torch.uint8`                    | 8     | Masks, binary flags         |
| 8-bit int signed | `torch.int8`                     | 8     | Quantized models            |
| 16-bit int       | `torch.int16` (`torch.short`)    | 16    | Rare                        |
| 32-bit int       | `torch.int32` (`torch.int`)      | 32    | Indices, labels             |
| 64-bit int       | `torch.int64` (`torch.long`)     | 64    | Default for indices         |
| Bool             | `torch.bool`                     | 1-bit | Masks, binary conditions    |
| Complex64        | `torch.complex64`                | 64    | FFTs, signal processing     |
| Complex128       | `torch.complex128`               | 128   | High precision complex      |


In [322]:
x = torch.tensor([1.0, 2.0])
print(x.dtype)  # torch.float32


torch.float32


In [323]:
x = torch.zeros(2, 2, dtype=torch.float64)
x

tensor([[0., 0.],
        [0., 0.]], dtype=torch.float64)

In [324]:
x = torch.randn(2, 2)

x_fp64 = x.to(torch.float64)
print(x_fp64.dtype)  # torch.float64
x_int = x.to(torch.int32)
print(x_int.dtype)  # torch.int32
x_fp64 = x.type(torch.float64)  # same
print(x_fp64.dtype)  # torch.float64


torch.float64
torch.int32
torch.float64


# **✅ Note: `.to()` can also set device at the same time:**

In [325]:
x = x.to(dtype=torch.float16, device='cuda')
x


tensor([[-0.0507,  1.5439],
        [-0.5356, -0.8599]], device='cuda:0', dtype=torch.float16)

| When                                        | Recommended dtype                   |
| ------------------------------------------- | ----------------------------------- |
| General DL training                         | `torch.float32`                     |
| High-precision numerics (e.g., eigenvalues) | `torch.float64`                     |
| Mixed precision (AMP)                       | `torch.float16` or `torch.bfloat16` |
| Masks                                       | `torch.bool`                        |
| Labels for classification                   | `torch.long`                        |
| One-hot / binary mask                       | `torch.uint8` or `torch.bool`       |


In [326]:
torch.tensor(data=[1,2],
             dtype=None,
             device=None,
             requires_grad=False)


tensor([1, 2])

| Parameter       | Type           | Purpose                               | Example               |
| --------------- | -------------- | ------------------------------------- | --------------------- |
| `data`          | array-like     | Input data (list, tuple, NumPy array) | `[1, 2, 3]`           |
| `dtype`         | `torch.dtype`  | Explicitly set the tensor’s data type | `torch.float32`       |
| `device`        | `torch.device` | CPU or CUDA device                    | `'cpu'` or `'cuda:0'` |
| `requires_grad` | `bool`         | Track gradients for autograd          | `True`                |


### Tensor datatypes 

**Note:** Tensor datatypes is one of the 3 big errors you'll run into with PyTorch & deep learning:
1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device



In [327]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # what datatype is the tensor (e.g. float32 or float16)
                               device=None, # What device is your tensor on
                               requires_grad=False) # whether or not to track gradients with this tensors operations
float_32_tensor

tensor([3., 6., 9.])

In [328]:
float_32_tensor.dtype

torch.float32

In [329]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [330]:
multi=float_16_tensor * float_32_tensor
multi,multi.dtype

(tensor([ 9., 36., 81.]), torch.float32)

In [331]:
int_32_temsor = torch.tensor([3.0, 6.0, 9.0],
                             dtype=torch.int32,
                             device=0)
int_32_temsor

tensor([3, 6, 9], device='cuda:0', dtype=torch.int32)

In [332]:
float_32_tensor*int_32_temsor

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [333]:
int_32_temsor=int_32_temsor.to(device="cpu")
int_32_temsor

tensor([3, 6, 9], dtype=torch.int32)

In [334]:
float_32_tensor*int_32_temsor

tensor([ 9., 36., 81.])

In [335]:
print(f"Shape of float_16_tensor: {float_16_tensor.shape}")
print(f"Dtype of float_16_tensor: {float_16_tensor.dtype}")
print(f"Device of float_16_tensor: {float_16_tensor.device}")

print(f"Size of float_16_tensor: {float_16_tensor.size()}")
print(f"Number of elements: {float_16_tensor.numel()}")
print(f"Sum of elements: {float_16_tensor.sum().item()}")
print(f"Mean of elements: {float_16_tensor.mean().item()}")


Shape of float_16_tensor: torch.Size([3])
Dtype of float_16_tensor: torch.float16
Device of float_16_tensor: cpu
Size of float_16_tensor: torch.Size([3])
Number of elements: 3
Sum of elements: 18.0
Mean of elements: 6.0


# **Manipulating Tensors (Tensor Operations)**

### **The primary tensor operations discussed here are:**

- **Addition:** Combining tensors element-wise by adding corresponding elements.
- **Subtraction:** Combining tensors element-wise by subtracting corresponding elements.
- **Multiplication (Element-wise)**: Multiplying corresponding elements of tensors.
- **Division (Element-wise):** Dividing corresponding elements of tensors.
- **Matrix Multiplication:** A linear algebra operation that combines tensors (typically 2D) to produce a new tensor.

In [336]:
A = torch.tensor([[1, 2], [3, 4]])
B = torch.tensor([[10, 20], [30, 40]])

In [337]:
A_10= A + 10  # Add 10 to each element of
print(A_10)

A__10=A*10  # Multiply each element of A by 10
print(A__10)

_10_A=A -10 # Subtract 10 from each element of A
print(_10_A)

tensor([[11, 12],
        [13, 14]])
tensor([[10, 20],
        [30, 40]])
tensor([[-9, -8],
        [-7, -6]])


In [338]:
A_10 = A.add(10)  # Add 10 to each element of A
print(A_10)

A__10=A.mul(10)  # Multiply each element of A by 10
print(A__10)

_10_A=A.sub(10)  # Subtract 10 from each element of A
print(_10_A)


tensor([[11, 12],
        [13, 14]])
tensor([[10, 20],
        [30, 40]])
tensor([[-9, -8],
        [-7, -6]])


In [342]:
# Check shapes: A.shape and B.shape — understand what will broadcast.
C = torch.add(A, B)
print(C)  # Output: tensor([[11, 22], [33, 44]])



# Use in-place only if you really need it:
A.add_(B)  # modifies A in place
A
# Note: In-place ops can break gradients. Avoid in-place operations in gradient flow unless you know the implications.

tensor([[21, 42],
        [63, 84]])


tensor([[21, 42],
        [63, 84]])

In [None]:
# Element-wise addition
A_plus_B = A + B  # Add corresponding elements of A and B
print(A_plus_B)  # Output: tensor([[11, 22], [33, 44]])

# Element-wise subtraction
A_minus_B = A - B  # Subtract corresponding elements of A and B
print(A_minus_B)  # Output: tensor([[-9, -18], [-27, -36]])

# Element-wise multiplication
A_times_B = A * B  # Multiply corresponding elements of A and B
print(A_times_B)  # Output: tensor([[10, 40], [90, 160]])

# Element-wise division
A_div_B = A / B  # Divide corresponding elements of A by B
print(A_div_B)  # Output: tensor([[0.1000, 0.1000], [0.1000, 0.1000]])

tensor([[11, 22],
        [33, 44]])
tensor([[ -9, -18],
        [-27, -36]])
tensor([[ 10,  40],
        [ 90, 160]])
tensor([[0.1000, 0.1000],
        [0.1000, 0.1000]])


### Broadcasting Example:

In [None]:
A = torch.tensor([[1, 2], [3, 4]])  # shape (2, 2)
B = torch.tensor([10, 20])          # shape (2,) --> broadcast to (2, 2)

C = A + B  # [[11, 22], [13, 24]]


### Element-wise Multiplication

In [344]:
A = torch.tensor([2, 3])
B = torch.tensor([4, 5])

C = A * B  # tensor([8, 15])
print(C)

"""
⚠️ Pitfalls
Common bug: * is element-wise, not matrix multiplication.
For matrices, A * B ≠ A @ B.
"""

tensor([ 8, 15])


!["dot_product](https://www.mathsisfun.com/algebra/images/matrix-multiply-a.svg)

### (1, 2, 3) • (7, 9, 11) = 1×7 + 2×9 + 3×11 = 58

![alt text](https://www.mathsisfun.com/algebra/images/matrix-multiply-b.svg)
### (1, 2, 3) • (8, 10, 12) = 1×8 + 2×10 + 3×12 = 64

In [370]:
# Matrix Multiplication(dot product)
A = torch.tensor([[1, 2,3], [4,5, 6]]) # shape (2, 3)
B = torch.tensor([[4, 8], [9, 10],[11,12]])  # shape (3, 2)

C = torch.matmul(A, B)
# or
C = A @ B
print(C)  # Output: tensor([[19, 22], [43, 50]])

tensor([[ 55,  64],
        [127, 154]])


In [371]:
import time

start_time = time.time()
result = torch.matmul(A, B)
end_time = time.time()

print(result)
print(f"Execution time: {end_time - start_time:.12f} seconds")

tensor([[ 55,  64],
        [127, 154]])
Execution time: 0.000000000000 seconds


In [372]:

start_time = time.time()
# simulate matrix multiplication by for loop
for i in range(len(A)):
    for j in range(len(B[0])):
        dot_product = 0
        for k in range(len(B)):
            dot_product += A[i][k] * B[k][j]
        print(f"Dot product of row {i} of A and column {j} of B: {dot_product}")

end_time = time.time()
print(f"Execution time: {end_time - start_time:.12f} seconds")

Dot product of row 0 of A and column 0 of B: 55
Dot product of row 0 of A and column 1 of B: 64
Dot product of row 1 of A and column 0 of B: 127
Dot product of row 1 of A and column 1 of B: 154
Execution time: 0.001122236252 seconds


## **1️⃣ Theoretical Rule**

**Definition:**
Given:

* Matrix **A** with shape $(m \times k)$
* Matrix **B** with shape $(k \times n)$

The matrix product:

$$
C = AB
\quad \text{has shape} \quad
(m \times n)
$$

**Key condition:**
✔ The **inner dimensions must match**: the **number of columns** of A = **number of rows** of B.

---

## 📌 **Matrix Product Element-wise**

Each element $c_{ij}$ in $C$ is:

$$
c_{ij} = \sum_{p=1}^{k} a_{ip} b_{pj}
$$

---

## ✅ **2️⃣ Practical Example**

**Example:**
A: (2 × 3)
B: (3 × 4)

Result: C is (2 × 4)

```python
import torch

A = torch.rand(2, 3)
B = torch.rand(3, 4)

C = torch.matmul(A, B)
print(C.shape)  # torch.Size([2, 4])
```

---

## ⚡ **3️⃣ Rules in Plain Language**

| Rule             | Description                     |
| ---------------- | ------------------------------- |
| Inner Dimensions | Must match (`A.cols == B.rows`) |
| Output Shape     | `(A.rows, B.cols)`              |
| Non-Commutative  | `AB` ≠ `BA` in general          |
| Associative      | `(AB)C = A(BC)`                 |
| Distributive     | `A(B + C) = AB + AC`            |
| Scalar Multiple  | `k(AB) = (kA)B = A(kB)`         |

---

## 🔍 **4️⃣ Visual Intuition**

Think of multiplying:

* **Rows of A** by **columns of B**.

**Example:**
If A is (2 × 3) → it has 2 row vectors of length 3.
If B is (3 × 4) → it has 4 column vectors of length 3.
Each output element is a dot product between a row of A and a column of B.

---

## ⚠️ **5️⃣ Common Pitfalls**

🚫 **Confusing `*` and `@`**:
`*` is element-wise, `@` is matrix product.

🚫 **Shape Mismatch**:
If `A` is (3 × 2) and `B` is (3 × 4) → cannot multiply! Must transpose or reshape.

---

## ✅ **6️⃣ Best Practices**

✔ Always check `A.shape` and `B.shape` before `A @ B`.

✔ Use `.T` to transpose if needed:

```python
B_T = B.T  # or B.transpose(0, 1)
```

✔ For batched matmuls:

* Use `torch.bmm` for `(b × m × k)` @ `(b × k × n)`.
* Or `torch.matmul` — supports broadcasting for batch dimensions.

✔ Use `torch.einsum` for complex contraction:

```python
torch.einsum('ik,kj->ij', A, B)
```

---

## 🔬 **7️⃣ Special Notes**

* **Square Matrices:**
  If A and B are both (n × n) → multiplication is valid.

* **Dot Product:**
  A vector dot product is a special case:

  $$
  \text{dot}(x, y) = x^T y
  $$

* **Outer Product:**

  $$
  uv^T = \text{matrix}
  \quad (m \times 1) \times (1 \times n) = m \times n
  $$

---
## ✅ **Summary — Rules Cheat Sheet**

| Concept       | Rule                                           |
| ------------- | ---------------------------------------------- |
| Shapes        | `(m × k)` @ `(k × n)` → `(m × n)`              |
| Order         | `AB ≠ BA`                                      |
| Associative   | `(AB)C = A(BC)`                                |
| Dot product   | Special case: `(1 × k)` @ `(k × 1)` → scalar   |
| Outer product | `(m × 1)` @ `(1 × n)` → `(m × n)`              |
| Broadcasting  | Not automatic — only batch dim can broadcast   |
| Operation     | Use `@` or `torch.matmul` for correct behavior |

---
## ✅**Golden Tip**

**Before multiplying matrices:**

1. Print their shapes.
2. Double-check the inner dimensions.
3. Use `.T` wisely.
4. Write down the shape math on paper if unsure!


In [375]:
A.shape[1]== B.shape[0]

True

In [376]:
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32) #shape (3, 2)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]], dtype=torch.float32) #shape (3, 2)

torch.matmul(tensor_A, tensor_B) # (this will error)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [381]:
print("tensor_A:")
print(tensor_A)
print("\ntensor_A Transposed:")
print(tensor_A.T)
print("\ntensor_B:")
print(tensor_B)
print("\ntensor_B Transposed:")
print(tensor_B.T)

tensor_A:
tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])

tensor_A Transposed:
tensor([[1., 3., 5.],
        [2., 4., 6.]])

tensor_B:
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])

tensor_B Transposed:
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [380]:
result = torch.matmul(tensor_A, tensor_B.T)
print(result)
print(result.shape)  # torch.Size([3, 3])

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])
torch.Size([3, 3])


In [383]:
print(f"tensor_A shape: {tensor_A.shape}, tensor_B shape: {tensor_B.shape}")
print(f"tensor_A Transposed shape: {tensor_A.T.shape}, tensor_B Transposed shape: {tensor_B.T.shape}")



tensor_A shape: torch.Size([3, 2]), tensor_B shape: torch.Size([3, 2])
tensor_A Transposed shape: torch.Size([2, 3]), tensor_B Transposed shape: torch.Size([2, 3])


In [385]:
print(tensor_A @ tensor_B.T)
print((tensor_A @ tensor_B.T).shape)

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])
torch.Size([3, 3])


## **MEAN,MAX,MIN,AGGREGATION  ........**

| Category          | PyTorch Ops                                                              |
| ----------------- | ------------------------------------------------------------------------ |
| Summation         | `torch.sum`, `torch.cumsum`, `torch.nansum`                              |
| Product           | `torch.prod`, `torch.cumprod`                                            |
| Mean & Statistics | `torch.mean`, `torch.std`, `torch.var`, `torch.median`, `torch.quantile` |
| Min/Max           | `torch.min`, `torch.max`, `torch.amin`, `torch.amax`                     |
| Argmin/Argmax     | `torch.argmin`, `torch.argmax`                                           |
| Norms             | `torch.norm`                                                             |
| Logical           | `torch.any`, `torch.all`                                                 |
| Count nonzero     | `torch.count_nonzero`                                                    |
| Unique            | `torch.unique`                                                           |


In [393]:
# torch.sum(input, dim=None, keepdim=False)
x = torch.arange(10).reshape(2, 5).float()  # Create a 2x5 tensor with values from 0 to 9
print("Original tensor x:")
print(x)

# Sum all elements in the tensor
total_sum = torch.sum(x)
print("\nSum of all elements in x:", total_sum)  # Output: 45.0

# Sum along columns (dim=0): sums over rows for each column
sum_dim0 = torch.sum(x, dim=0)
print("\nSum along columns (dim=0):", sum_dim0)  # Output: tensor([ 5.,  7.,  9., 11., 13.])

# Sum along rows (dim=1): sums over columns for each row
sum_dim1 = torch.sum(x, dim=1)
print("\nSum along rows (dim=1):", sum_dim1)  # Output: tensor([10., 35.])

# Sum along rows (dim=1) but keep the dimension (result is column vector)
sum_dim1_keepdim = torch.sum(x, dim=1, keepdim=True)
print("\nSum along rows (dim=1) with keepdim=True:\n", sum_dim1_keepdim)  # Output: tensor([[10.], [35.]])


Original tensor x:
tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]])

Sum of all elements in x: tensor(45.)

Sum along columns (dim=0): tensor([ 5.,  7.,  9., 11., 13.])

Sum along rows (dim=1): tensor([10., 35.])

Sum along rows (dim=1) with keepdim=True:
 tensor([[10.],
        [35.]])


In [394]:
# Cumulative sum of all elements (flattened)
cumsum_all = torch.cumsum(x.flatten(), dim=0)
print("Cumulative sum of all elements (flattened):", cumsum_all)

# Cumulative sum along columns (dim=0)
cumsum_dim0 = torch.cumsum(x, dim=0)
print("\nCumulative sum along columns (dim=0):\n", cumsum_dim0)

# Cumulative sum along rows (dim=1)
cumsum_dim1 = torch.cumsum(x, dim=1)
print("\nCumulative sum along rows (dim=1):\n", cumsum_dim1)


Cumulative sum of all elements (flattened): tensor([ 0.,  1.,  3.,  6., 10., 15., 21., 28., 36., 45.])

Cumulative sum along columns (dim=0):
 tensor([[ 0.,  1.,  2.,  3.,  4.],
        [ 5.,  7.,  9., 11., 13.]])

Cumulative sum along rows (dim=1):
 tensor([[ 0.,  1.,  3.,  6., 10.],
        [ 5., 11., 18., 26., 35.]])


In [431]:
# torch.nansum(input, dim=None, keepdim=False)
# nansum computes the sum of all elements, treating NaNs as zero

x_with_nan = torch.tensor([[0., 1., float('nan'), 3., 4.],
                           [5., 6., 7., float('nan'), 9.]])

print("Tensor with NaNs:")
print(x_with_nan)

nansum_total = torch.nansum(x_with_nan)
print("\nSum of all elements in x_with_nan (ignoring NaNs):", nansum_total)

nansum_dim0 = torch.nansum(x_with_nan, dim=0)
print("\nSum along columns (dim=0, ignoring NaNs):", nansum_dim0)

nansum_dim1 = torch.nansum(x_with_nan, dim=1)
print("\nSum along rows (dim=1, ignoring NaNs):", nansum_dim1)


Tensor with NaNs:
tensor([[0., 1., nan, 3., 4.],
        [5., 6., 7., nan, 9.]])

Sum of all elements in x_with_nan (ignoring NaNs): tensor(35.)

Sum along columns (dim=0, ignoring NaNs): tensor([ 5.,  7.,  7.,  3., 13.])

Sum along rows (dim=1, ignoring NaNs): tensor([ 8., 27.])


In [432]:
# torch.mean(input, dim=None, keepdim=False, dtype=None)
x = torch.arange(10).reshape(2, 5).float()  # 2x5 tensor with values 0-9
print("Original tensor x:")
print(x)

# Mean of all elements
mean_total = torch.mean(x)
print("\nMean of all elements in x:", mean_total)  # Output: 4.5

# Mean along columns (dim=0)
mean_dim0 = torch.mean(x, dim=0)
print("\nMean along columns (dim=0):", mean_dim0)  # Output: tensor([2.5, 3.5, 4.5, 5.5, 6.5])

# Mean along rows (dim=1)
mean_dim1 = torch.mean(x, dim=1)
print("\nMean along rows (dim=1):", mean_dim1)  # Output: tensor([2., 7.])

# Mean along rows (dim=1) with keepdim=True
mean_dim1_keepdim = torch.mean(x, dim=1, keepdim=True)
print("\nMean along rows (dim=1) with keepdim=True:\n", mean_dim1_keepdim)  # Output: tensor([[2.], [7.]])


Original tensor x:
tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]])

Mean of all elements in x: tensor(4.5000)

Mean along columns (dim=0): tensor([2.5000, 3.5000, 4.5000, 5.5000, 6.5000])

Mean along rows (dim=1): tensor([2., 7.])

Mean along rows (dim=1) with keepdim=True:
 tensor([[2.],
        [7.]])


In [433]:
# torch.std(input, dim=None, unbiased=True, keepdim=False)
# torch.var(input, dim=None, unbiased=True, keepdim=False)

print("Original tensor x:")
print(x)

# Standard deviation of all elements
std_total = torch.std(x)
print("\nStandard deviation of all elements in x:", std_total)

# Variance of all elements
var_total = torch.var(x)
print("Variance of all elements in x:", var_total)

# Standard deviation along columns (dim=0)
std_dim0 = torch.std(x, dim=0)
print("\nStandard deviation along columns (dim=0):", std_dim0)

# Variance along columns (dim=0)
var_dim0 = torch.var(x, dim=0)
print("Variance along columns (dim=0):", var_dim0)

# Standard deviation along rows (dim=1)
std_dim1 = torch.std(x, dim=1)
print("\nStandard deviation along rows (dim=1):", std_dim1)

# Variance along rows (dim=1)
var_dim1 = torch.var(x, dim=1)
print("Variance along rows (dim=1):", var_dim1)


Original tensor x:
tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]])

Standard deviation of all elements in x: tensor(3.0277)
Variance of all elements in x: tensor(9.1667)

Standard deviation along columns (dim=0): tensor([3.5355, 3.5355, 3.5355, 3.5355, 3.5355])
Variance along columns (dim=0): tensor([12.5000, 12.5000, 12.5000, 12.5000, 12.5000])

Standard deviation along rows (dim=1): tensor([1.5811, 1.5811])
Variance along rows (dim=1): tensor([2.5000, 2.5000])


In [434]:
print("Original tensor x:")
print(x)
# torch.median(input) returns the median of all elements (flattened)
median_total = torch.median(x)
print("Median of all elements in x:", median_total)

# torch.median(input, dim=0) returns (values, indices) along columns
median_dim0, indices_dim0 = torch.median(x, dim=0)
print("Median along columns (dim=0):", median_dim0)
print("Indices of medians along columns (dim=0):", indices_dim0)

# torch.median(input, dim=1) returns (values, indices) along rows
median_dim1, indices_dim1 = torch.median(x, dim=1)
print("Median along rows (dim=1):", median_dim1)
print("Indices of medians along rows (dim=1):", indices_dim1)


Original tensor x:
tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]])
Median of all elements in x: tensor(4.)
Median along columns (dim=0): tensor([0., 1., 2., 3., 4.])
Indices of medians along columns (dim=0): tensor([0, 0, 0, 0, 0])
Median along rows (dim=1): tensor([2., 7.])
Indices of medians along rows (dim=1): tensor([2, 2])


In [435]:
# torch.min and torch.max (two forms)

print("Original tensor x:")
print(x)

# Scalar min and max (over all elements)
min_total = torch.min(x)
max_total = torch.max(x)
print("\nMin of all elements in x:", min_total)
print("Max of all elements in x:", max_total)

# Min and max along columns (dim=0): returns (values, indices)
min_dim0, min_indices_dim0 = torch.min(x, dim=0)
max_dim0, max_indices_dim0 = torch.max(x, dim=0)
print("\nMin along columns (dim=0):", min_dim0)
print("Indices of min along columns (dim=0):", min_indices_dim0)
print("Max along columns (dim=0):", max_dim0)
print("Indices of max along columns (dim=0):", max_indices_dim0)

# torch.amin and torch.amax (value only, no indices)
amin_total = torch.amin(x)
amax_total = torch.amax(x)
print("\nAmin of all elements in x:", amin_total)
print("Amax of all elements in x:", amax_total)

# torch.argmin and torch.argmax
argmin_total = torch.argmin(x)
argmax_total = torch.argmax(x)
print("\nArgmin of all elements in x (flattened):", argmin_total)
print("Argmax of all elements in x (flattened):", argmax_total)

# Argmin/argmax along columns (dim=0)
argmin_dim0 = torch.argmin(x, dim=0)
argmax_dim0 = torch.argmax(x, dim=0)
print("\nArgmin along columns (dim=0):", argmin_dim0)
print("Argmax along columns (dim=0):", argmax_dim0)


Original tensor x:
tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]])

Min of all elements in x: tensor(0.)
Max of all elements in x: tensor(9.)

Min along columns (dim=0): tensor([0., 1., 2., 3., 4.])
Indices of min along columns (dim=0): tensor([0, 0, 0, 0, 0])
Max along columns (dim=0): tensor([5., 6., 7., 8., 9.])
Indices of max along columns (dim=0): tensor([1, 1, 1, 1, 1])

Amin of all elements in x: tensor(0.)
Amax of all elements in x: tensor(9.)

Argmin of all elements in x (flattened): tensor(0)
Argmax of all elements in x (flattened): tensor(9)

Argmin along columns (dim=0): tensor([0, 0, 0, 0, 0])
Argmax along columns (dim=0): tensor([1, 1, 1, 1, 1])


## **1️. Reshaping — `torch.reshape` & `.view`**

### 📘 **Theory**

Reshaping reinterprets the same **underlying data buffer** with a **new shape** — no data copy if possible.

* `tensor.reshape(shape)`
* `tensor.view(shape)` (older style, stricter: works only if the tensor is contiguous in memory)

**Condition:**
The **total number of elements** must stay the same.

$$
\text{Shape A} \quad \Rightarrow \quad \text{Shape B} \quad 
\text{only if} \quad \prod A = \prod B
$$


In [436]:
x = torch.arange(1.,11.)
print(x.shape)

y = x.reshape(5,2)
print(y)


torch.Size([10])
tensor([[ 1.,  2.],
        [ 3.,  4.],
        [ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])


In [437]:
x.reshape(2, -1)


tensor([[ 1.,  2.,  3.,  4.,  5.],
        [ 6.,  7.,  8.,  9., 10.]])

In [438]:
# .view() errors out if the tensor is not contiguous → fix with .contiguous().

z= x.view(5,2)  # Reshape x to 2 rows, automatically infer columns
print(z)
z[0][1]=10



tensor([[ 1.,  2.],
        [ 3.,  4.],
        [ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])


In [439]:
print(z)
print(x)

tensor([[ 1., 10.],
        [ 3.,  4.],
        [ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])
tensor([ 1., 10.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])


In [448]:
x = torch.rand(1, 3, 1, 5)
print(x)
print(x.shape)  # torch.Size([1, 3, 1, 5])

y = torch.squeeze(x)
print(y)
print(y.shape)  # torch.Size([3, 5])

# Only squeeze dim=2 (which is size 1)
torch.squeeze(x, dim=2).shape  # (1, 3, 5)
print(x)


tensor([[[[0.0962, 0.1468, 0.9028, 0.8003, 0.7529]],

         [[0.0500, 0.7613, 0.8566, 0.3957, 0.9235]],

         [[0.6141, 0.1628, 0.9465, 0.3997, 0.9280]]]])
torch.Size([1, 3, 1, 5])
tensor([[0.0962, 0.1468, 0.9028, 0.8003, 0.7529],
        [0.0500, 0.7613, 0.8566, 0.3957, 0.9235],
        [0.6141, 0.1628, 0.9465, 0.3997, 0.9280]])
torch.Size([3, 5])
tensor([[[[0.0962, 0.1468, 0.9028, 0.8003, 0.7529]],

         [[0.0500, 0.7613, 0.8566, 0.3957, 0.9235]],

         [[0.6141, 0.1628, 0.9465, 0.3997, 0.9280]]]])


In [451]:
x = torch.tensor([1, 2, 3])  # (3,)
y = torch.unsqueeze(x, dim=0)  # (1, 3)
z = torch.unsqueeze(x, dim=1)  # (3, 1)
print("Original tensor x:", x)
print("Unsqueezed tensor y (dim=0):", y)
print("Unsqueezed tensor z (dim=1):", z)

Original tensor x: tensor([1, 2, 3])
Unsqueezed tensor y (dim=0): tensor([[1, 2, 3]])
Unsqueezed tensor z (dim=1): tensor([[1],
        [2],
        [3]])


In [452]:
a = torch.tensor([1, 2])
b = torch.tensor([3, 4])
c = torch.stack([a, b], dim=0)
print(c)  # [[1, 2], [3, 4]] → shape (2, 2)

d = torch.stack([a, b], dim=1)
print(d)  # [[1, 3], [2, 4]] → shape (2, 2)


tensor([[1, 2],
        [3, 4]])
tensor([[1, 3],
        [2, 4]])


## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way


## PyTorch tensors & NumPy

NumPy is a popular scientific Python numerical computing library.

And because of this, PyTorch has functionality to interact with it.

* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [454]:
# NumPy array to tensor
import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) # warning: when converting from numpy -> pytorch, pytorch reflects numpy's default datatype of float64 unless specified otherwise
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [455]:
# Change the value of array, what will this do to `tensor`?
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [456]:
# Tensor to NumPy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [457]:
# Change the tesnor, what happens to `numpy_tensor`?
tensor = tensor + 1
tensor, numpy_tensor


(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

# **Reproducbility**
- Reproducibility means that someone (including you) can run the same code or experiment again and get the same result, within a reasonable margin of error

In [509]:
random_tensor_a =torch.rand(3,4)
random_tensor_b =torch.rand(3,4)


print("Random Tensor A:")
print(random_tensor_a)
print("Random Tensor B:")
print(random_tensor_b)

print("- Are they equal??\n", random_tensor_a== random_tensor_b)

Random Tensor A:
tensor([[0.5343, 0.3798, 0.8840, 0.8208],
        [0.9589, 0.8026, 0.4933, 0.6605],
        [0.7398, 0.1719, 0.5888, 0.0170]])
Random Tensor B:
tensor([[0.0110, 0.7976, 0.9729, 0.5154],
        [0.4507, 0.6798, 0.6274, 0.1614],
        [0.4737, 0.8413, 0.8910, 0.3652]])
- Are they equal??
 tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [513]:
RANDOM_SEED=42
torch.manual_seed(RANDOM_SEED)

random_tensor_c= torch.rand(3,4)
random_tensor_d = torch.rand(3,4)
print("Random Tensor C:")
print(random_tensor_c)
print("Random Tensor D:")
print(random_tensor_d)

print("- Are they equal??\n", random_tensor_c== random_tensor_d)

Random Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
Random Tensor D:
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
- Are they equal??
 tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [514]:
RANDOM_SEED=42

torch.manual_seed(RANDOM_SEED)
random_tensor_c= torch.rand(3,4)

torch.manual_seed(RANDOM_SEED)
random_tensor_d = torch.rand(3,4)

print("Random Tensor C:")
print(random_tensor_c)
print("Random Tensor D:")
print(random_tensor_d)

print("- Are they equal??\n", random_tensor_c== random_tensor_d)

Random Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
Random Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
- Are they equal??
 tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


In [554]:
import random
import numpy as np
import torch

def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if using multi-GPU

    # Force deterministic operations
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    print(f"[INFO] Random seed set to {seed}.")

seed_everything(42)

random_seed_e=torch.rand(3,4)
random_seed_f= torch.rand(3,4)

print("Random Seed E:")
print(random_seed_e)
print("Random Seed F:")
print(random_seed_f)
print("- Are they equal??\n", random_seed_e== random_seed_f)

[INFO] Random seed set to 42.
Random Seed E:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
Random Seed F:
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
- Are they equal??
 tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [575]:
def get_device(prefer_gpu=True, index=0, verbose=True):
    """
    Get the computing device (GPU if available and preferred, else CPU),
    and print concise hardware info.
    """
    if prefer_gpu and torch.cuda.is_available():
        device = torch.device(f'cuda:{index}')
        if verbose:
            print(f"[INFO] Using GPU {index}: {torch.cuda.get_device_name(index)}")
            print(f"[INFO] CUDA Runtime Version: {torch.version.cuda}")
            print(f"[INFO] cuDNN Version: {torch.backends.cudnn.version()}")
    else:
        device = torch.device('cpu')
        if verbose:
            print("[INFO] Using CPU.")
    if verbose:
        print(f"[INFO] PyTorch Version: {torch.__version__}")
    return device

# Example usage
device = get_device(prefer_gpu=True, index=0, verbose=True)


[INFO] Using GPU 0: NVIDIA GeForce RTX 3070 Laptop GPU
[INFO] CUDA Runtime Version: 12.1
[INFO] cuDNN Version: 90100
[INFO] PyTorch Version: 2.5.1+cu121


In [576]:
x = torch.randn(3, 3, device=device)

y = torch.randn(3, 3)
y = y.to(device)

z = x + y
print(z.device)


cuda:0


In [577]:
x = torch.randn(3, 3)

y = torch.randn(3, 3)

z = x + y
print(z.device)

cpu


In [578]:
x = torch.randn(3, 3, device=device)

y = torch.randn(3, 3)

z = x + y
print(z.device)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [580]:
print(f"Device of x: {x.device}")
transform_numpy= x.numpy()  # Convert tensor to NumPy array

Device of x: cuda:0


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [583]:
transform_numpy_correct= x.to(device="cpu").numpy()
transform_numpy_correct

array([[-0.6973659 , -1.8688258 , -0.8831874 ],
       [-1.6627042 , -0.43243223,  0.9504958 ],
       [ 0.6620458 ,  0.04456767,  0.57203025]], dtype=float32)

In [584]:
transform_numpy_correct= x.cpu().numpy()
transform_numpy_correct

array([[-0.6973659 , -1.8688258 , -0.8831874 ],
       [-1.6627042 , -0.43243223,  0.9504958 ],
       [ 0.6620458 ,  0.04456767,  0.57203025]], dtype=float32)