In [None]:
import numpy as np
import torch

In [None]:
!pip list | grep torch

torch                                 2.6.0+cu124
torchao                               0.10.0
torchaudio                            2.6.0+cu124
torchdata                             0.11.0
torchsummary                          1.5.1
torchtune                             0.6.1
torchvision                           0.21.0+cu124


In [None]:
!pip list | grep numpy

numpy                                 2.0.2


# Fundamentals

## Tensor Operations

### Create tensor from list

In [None]:
data = [[1,2], [3,4]]

In [None]:
tensor_fromList = torch.tensor(data)
print(tensor_fromList)

tensor([[1, 2],
        [3, 4]])


### Create tensor from numpy array

In [None]:
np_array = np.array([[1,2], [3,4]])

In [None]:
tensor_from_numpy = torch.from_numpy(np_array)
print(tensor_from_numpy)

tensor([[1, 2],
        [3, 4]])


### Create special tensors

In [None]:
zeros_tensor = torch.zeros(2,3)
print(zeros_tensor)

tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [None]:
ones_tensor = torch.ones(2,3)
print(ones_tensor)

tensor([[1., 1., 1.],
        [1., 1., 1.]])


#### Noraml Distribution
* Creates a 2×3 matrix of random numbers.
* Each number is sampled from a normal distribution (mean = 0, standard deviation = 1).
* We use it when we want to test or **initialize models with random inputs**.

In [None]:
random_tensor = torch.randn(2,3)
print(random_tensor)

tensor([[ 1.4036,  2.1154,  0.0914],
        [ 0.5143, -0.0195,  0.3732]])


#### Uniform Distribution
* Creates a 2×3 matrix with random numbers between 0 and 1.
* The numbers come from a uniform distribution.
* We use it to test or **initialize models when we want values in a known range**.

In [None]:
uniform_tensor = torch.rand(2,3)
print(uniform_tensor)

tensor([[0.7950, 0.3591, 0.3056],
        [0.8326, 0.7021, 0.7701]])


### Create sequence tensors

In [None]:
range_tensor = torch.arange(0, 10, 2)
print(range_tensor)

tensor([0, 2, 4, 6, 8])


In [None]:
linspace_tensor = torch.linspace(0, 1, 5)
print(linspace_tensor)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


## Tensor Operations

In [None]:
tensor_a = torch.tensor([[1,2], [3, 4]], dtype=torch.float32)
tensor_b = torch.tensor([[5, 6], [7, 8]], dtype=torch.float32)
print(tensor_a)
print(tensor_b)

tensor([[1., 2.],
        [3., 4.]])
tensor([[5., 6.],
        [7., 8.]])


### Basic Arithmetric Operations

In [None]:
addition_res = tensor_a + tensor_b
print(addition_res)

tensor([[ 6.,  8.],
        [10., 12.]])


In [None]:
substraction_res = tensor_a - tensor_b
print(substraction_res)

tensor([[-4., -4.],
        [-4., -4.]])


In [None]:
multiplication_res = tensor_a * tensor_b
print(multiplication_res)

tensor([[ 5., 12.],
        [21., 32.]])


In [None]:
division_res = tensor_a / tensor_b
print(division_res)

tensor([[0.2000, 0.3333],
        [0.4286, 0.5000]])


### Matrix Operation

#### Multiplication

[[1, 2],      [[5, 6],  
 [3, 4]]       [7, 8]]
    


##### [Broadcasting semantics](https://docs.pytorch.org/docs/stable/notes/broadcasting.html#broadcasting-semantics)
✅ Broadcasting Rules (PyTorch)
Two tensors are broadcastable if:
1. Both tensors have at least one dimension.
2. For each dimension (**starting from the last**):
  * The sizes are equal, or
  * One of the sizes is 1, or
  * The dimension does not exist in one tensor (i.e., it will be padded with 1).

```python
# same shapes are always broadcastable (i.e. the above rules always hold)
x=torch.empty(5,7,3)
y=torch.empty(5,7,3)

x=torch.empty((0,))
y=torch.empty(2,2)
# x and y are not broadcastable, because x does not have at least 1 dimension

# can line up trailing dimensions
x=torch.empty(5,3,4,1)
y=torch.empty(  3,1,1)
# x and y are broadcastable.
# 1st trailing dimension: both have size 1
# 2nd trailing dimension: y has size 1
# 3rd trailing dimension: x size == y size
# 4th trailing dimension: y dimension doesn't exist

x=torch.empty(5,2,4,1)
y=torch.empty(  3,1,1)
# x and y are not broadcastable, because in the 3rd trailing dimension 2 != 3
```



##### torch.mm vs. torch.matmul

| Feature              | `torch.mm`                              | `torch.matmul`                                               |
| -------------------- | --------------------------------------- | ------------------------------------------------------------ |
| Supported dimensions | **Only works on 2D tensors**            | Works on **1D, 2D, and higher-dimensional tensors**          |
| Broadcasting         | ❌ Not supported                         | ✅ Supported (for batched matrix multiplication)              |
| Flexibility          | Fixed input shapes: `(m, n)` × `(n, p)` | Flexible: supports vector × matrix, matrix × matrix, batched |
| Typical use case     | Simple matrix multiplication            | Batched matrix multiplication or vector-matrix operations    |


In [None]:
matrix_mul_res = torch.matmul(tensor_a, tensor_b)
print(matrix_mul_res)

tensor([[19., 22.],
        [43., 50.]])


#### Dot Production
* Computes the dot product of two 1D tensors.
```
a = [a₁, a₂, ..., aₙ]
b = [b₁, b₂, ..., bₙ]
a • b = a₁·b₁ + a₂·b₂ + ... + aₙ·bₙ
```

In [None]:
print(tensor_a.shape)
tensor_a_flat = tensor_a.flatten()
print(tensor_a_flat.shape)

torch.Size([2, 2])
torch.Size([4])


In [None]:
print(tensor_b.shape)
tensor_b_flat = tensor_b.flatten()
print(tensor_b_flat.shape)

torch.Size([2, 2])
torch.Size([4])


In [None]:
dot_product_res = torch.dot(tensor_a_flat, tensor_b_flat)
print(dot_product_res)

tensor(70.)


### Statistical Operations

In [None]:
print(torch.sum(tensor_a))

tensor(10.)


In [None]:
print(torch.mean(tensor_a))

tensor(2.5000)


In [None]:
print(torch.max(tensor_a))

tensor(4.)


In [None]:
print(torch.min(tensor_a))

tensor(1.)


##### ✅ torch.std(tensor_a)

```python
torch.std(tensor_a)
```

* Flattens the tensor to: `[1, 2, 3, 4]`
* Calculates **standard deviation** with `unbiased=True` (i.e. divides by `N - 1`)

### 📐 Calculation:

* Total elements: `N = 4`

* Mean:

  $$
  \bar{x} = \frac{1 + 2 + 3 + 4}{4} = 2.5
  $$

* Squared differences:

  $$
  (1 - 2.5)^2 + (2 - 2.5)^2 + (3 - 2.5)^2 + (4 - 2.5)^2 = 5.0
  $$

* Standard deviation (unbiased):

  $$
  \sqrt{\frac{5.0}{3}} ≈ 1.291
  $$

```python
print(torch.std(x))  # tensor(1.2910)
```

---

## ✅ Compute Standard Deviation by Dimension

### `dim=0` – Column-wise

```python
torch.std(x, dim=0)
```

* Column 0: \[1, 3] → mean = 2

  $$
  \sqrt{(1 - 2)^2 + (3 - 2)^2} = \sqrt{2} ≈ 1.4142
  $$
* Column 1: \[2, 4] → mean = 3

  $$
  \sqrt{(2 - 3)^2 + (4 - 3)^2} = \sqrt{2} ≈ 1.4142
  $$

```python
tensor([1.4142, 1.4142])
```

---

### `dim=1` – Row-wise

```python
torch.std(x, dim=1)
```

* Row 0: \[1, 2] → mean = 1.5

  $$
  \sqrt{(1 - 1.5)^2 + (2 - 1.5)^2} = \sqrt{0.5} ≈ 0.7071
  $$
* Row 1: \[3, 4] → mean = 3.5

  $$
  \sqrt{(3 - 3.5)^2 + (4 - 3.5)^2} = \sqrt{0.5} ≈ 0.7071
  $$

```python
tensor([0.7071, 0.7071])
```

---

## 📝 Notes

* `torch.std(x)` uses `unbiased=True` by default (sample standard deviation)
* Use `unbiased=False` for population standard deviation:

  ```python
  torch.std(x, unbiased=False)
  ```

##### ✅ What is "degrees of freedom"?

**Degrees of freedom (DoF)** means:

> "How many numbers can vary freely when you calculate something."

##### 🎯 Why is it **N − 1** when calculating standard deviation?

When you calculate standard deviation from a sample, you **first** compute the **mean** (average).
Once the mean is fixed, **only (N − 1)** numbers can vary freely.

📌 Example:

Imagine you have 3 numbers, but you already calculated the mean.

```python
a + b + c = 30   →  mean = 10
```

If you choose **any two** numbers (say, `a = 8`, `b = 9`),
then the **third** number `c` is already fixed:

```
c = 30 - a - b = 30 - 8 - 9 = 13
```

➡️ Only **2 numbers** can vary freely.
So: **degrees of freedom = 3 − 1 = 2**

---

## 🧠 Why does this matter?

When calculating variance or standard deviation:

* You are using the **sample mean**, not the true mean.
* That **uses up one degree of freedom**.
* So, divide by **N − 1** to correct for that.

This helps your result better **match the true population**.

---

✅ Summary

| Concept               | Meaning                            |
| --------------------- | ---------------------------------- |
| Degrees of Freedom    | How many values can vary freely    |
| In standard deviation | Subtract 1 because mean is used    |
| So use N−1            | To make the estimate more accurate |

##### 📊 Variance vs Standard Deviation in PyTorch

| Feature                | **Variance** (`torch.var`)             | **Standard Deviation** (`torch.std`)    |
| ---------------------- | -------------------------------------- | --------------------------------------- |
| **Meaning**            | Average squared distance from the mean | Square root of variance                 |
| **Measures**           | How spread out the data is             | Same, but in original units             |
| **Formula**            | $\frac{1}{N-1} \sum (x_i - \bar{x})^2$ | $\sqrt{\text{variance}}$                |
| **Biased vs Unbiased** | Controlled by `unbiased=True/False`    | Same                                    |
| **Output Unit**        | Squared unit (e.g., cm², dollars²)     | Same as input (e.g., cm, dollars)       |
| **Use in Practice**    | Used in math/stats formulas            | Used when we want human-readable spread |
| **In PyTorch**         | `torch.var(x)`                         | `torch.std(x)`                          |

In [None]:
print(torch.std(tensor_a))

tensor(1.2910)


### Tensor Shape Operations

In [None]:
input_tensor = torch.randn(2, 3, 4)
print(input_tensor)

tensor([[[ 1.3921, -0.0109,  0.9984, -1.5708],
         [-0.6491,  1.5410,  0.1815,  0.1415],
         [-0.2805, -0.3807,  0.3349, -2.4612]],

        [[-0.9851,  1.1713,  0.9401, -0.7365],
         [ 0.4465,  0.2034, -1.0256, -1.3213],
         [ 0.8487,  1.4109,  0.5559, -0.4591]]])


#### Reshape Tensor

##### Tensor.view(*shape) → Tensor
* Returns a new tensor with the same data as the self tensor but of a different shape.

In [None]:
reshaped_tensor = input_tensor.view(6, 4)
print(reshaped_tensor)

tensor([[ 1.3921, -0.0109,  0.9984, -1.5708],
        [-0.6491,  1.5410,  0.1815,  0.1415],
        [-0.2805, -0.3807,  0.3349, -2.4612],
        [-0.9851,  1.1713,  0.9401, -0.7365],
        [ 0.4465,  0.2034, -1.0256, -1.3213],
        [ 0.8487,  1.4109,  0.5559, -0.4591]])


In [None]:
reshaped_tensor = input_tensor.view(-1, 4) # the size -1 is inferred from other dimensions
print(reshaped_tensor)

tensor([[ 1.3921, -0.0109,  0.9984, -1.5708],
        [-0.6491,  1.5410,  0.1815,  0.1415],
        [-0.2805, -0.3807,  0.3349, -2.4612],
        [-0.9851,  1.1713,  0.9401, -0.7365],
        [ 0.4465,  0.2034, -1.0256, -1.3213],
        [ 0.8487,  1.4109,  0.5559, -0.4591]])


##### Tensor.reshape(*shape) → Tensor
* Returns a tensor with the same data and number of elements as self but with the specified shape.



In [None]:
reshaped_safe_tensor = input_tensor.reshape(3, 8)
print(reshaped_safe_tensor)

tensor([[ 1.3921, -0.0109,  0.9984, -1.5708, -0.6491,  1.5410,  0.1815,  0.1415],
        [-0.2805, -0.3807,  0.3349, -2.4612, -0.9851,  1.1713,  0.9401, -0.7365],
        [ 0.4465,  0.2034, -1.0256, -1.3213,  0.8487,  1.4109,  0.5559, -0.4591]])


##### tensor.view() vs tensor.reshape()
| Feature                | `tensor.view()`                       | `tensor.reshape()`                          |
| ---------------------- | ------------------------------------- | ------------------------------------------- |
| **Purpose**            | Change the shape of a tensor          | Also changes the shape of a tensor          |
| **Must be contiguous** | ✅ Yes, **requires contiguous memory** | ❌ No, will create a copy if needed          |
| **Returns**            | A view (no copy, same memory)         | May return a view or a copy                 |
| **Speed**              | Faster (if tensor is contiguous)      | Slightly slower (handles more cases)        |
| **When to use**        | When you're sure tensor is contiguous | Safer if you're unsure or doing complex ops |


##### 📘 What does "contiguous" mean?
* A contiguous tensor is **stored in memory without gaps**.
* Some operations (like **transpose**) make tensors **non-contiguous**.

In [None]:
x = torch.randn(2,3)
print(x)
print(x.shape)
print("x is contigious: " + "True" if x.is_contiguous() else "False")

tensor([[ 0.8279,  1.1723, -0.1284],
        [-2.5598, -0.5642,  1.3806]])
torch.Size([2, 3])
x is contigious: True


In [None]:
y = x.T
print(y)
print(y.shape)
print("y is contigious: " + ("True" if y.is_contiguous() else "False"))

tensor([[ 0.8279, -2.5598],
        [ 1.1723, -0.5642],
        [-0.1284,  1.3806]])
torch.Size([3, 2])
y is contigious: False


#### Transpose

In [None]:
input_tensor = torch.randn(2,3,4)
print(input_tensor)

tensor([[[ 0.1172,  0.9665,  2.7091, -0.5076],
         [ 1.3727, -0.5623,  0.1293, -1.0053],
         [ 0.2673, -0.2771, -1.0179,  0.4450]],

        [[-1.7280,  0.8289,  0.5729,  0.2220],
         [-0.6247, -0.1284, -1.5718,  1.4067],
         [-0.5254, -0.7060,  1.8072, -0.9697]]])


#####  Tensor.transpose(dim0, dim1) → Tensor
Returns a tensor that is a transposed version of input. The given dimensions **dim0 and dim1 are swapped**.



In [None]:
transposed_tensor = input_tensor.transpose(0, 2)
print(transposed_tensor)
print("----")
print("is contigious: " + ("True" if transposed_tensor.is_contiguous() else "False"))

tensor([[[ 0.1172, -1.7280],
         [ 1.3727, -0.6247],
         [ 0.2673, -0.5254]],

        [[ 0.9665,  0.8289],
         [-0.5623, -0.1284],
         [-0.2771, -0.7060]],

        [[ 2.7091,  0.5729],
         [ 0.1293, -1.5718],
         [-1.0179,  1.8072]],

        [[-0.5076,  0.2220],
         [-1.0053,  1.4067],
         [ 0.4450, -0.9697]]])
----
is contigious: False


#### Squeeze and unsequeeze dimensions

##### tensor.squeeze()
* Returns a tensor with all specified dimensions of input of size 1 removed.

[![Screenshot-2025-07-20-at-12-42-47-PM.png](https://i.postimg.cc/yNrbw5YR/Screenshot-2025-07-20-at-12-42-47-PM.png)](https://postimg.cc/xkM5m6mj)

In [None]:
x = torch.ones(2,1,2,2,1)
print(x.shape)
print(x)

torch.Size([2, 1, 2, 2, 1])
tensor([[[[[1.],
           [1.]],

          [[1.],
           [1.]]]],



        [[[[1.],
           [1.]],

          [[1.],
           [1.]]]]])


In [None]:
y = x.squeeze()
print(y.shape)
print(y)

torch.Size([2, 2, 2])
tensor([[[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]]])


In [None]:
y = x.squeeze(0)
print(y.shape)

torch.Size([2, 1, 2, 2, 1])


In [None]:
y = x.squeeze(1)
print(y.shape)

torch.Size([2, 2, 2, 1])


In [None]:
y = x.squeeze((1,2,3,4))
print(y.shape)

torch.Size([2, 2, 2])


##### tensor.unsqueeze(dim)
* dim (int) – the index at which to insert the singleton dimension
* Returns a new tensor with a dimension of size 1 inserted at the specified position.
* The returned tensor shares the same underlying data with this tensor.
* Useful for **reshaping tensors for broadcasting** or neural nets.
* Opposite of squeeze() (which removes dimensions of size 1).

In [None]:
x = torch.tensor([1, 2, 3])
print(x.shape)

torch.Size([3])


In [None]:
y = x.unsqueeze(0)
print(y.shape)
print(y)

torch.Size([1, 3])
tensor([[1, 2, 3]])


In [None]:
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6]])   # shape: (2, 3)

In [None]:
y = torch.unsqueeze(x, 1)  # shape: (2, 1, 3)
print(y.shape)

torch.Size([2, 1, 3])


#### Concatenate Tensors

In [None]:
tensor_a = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(tensor_a.shape)
print(tensor_a)

print("---")

tensor_b= torch.tensor([[7, 8, 9], [10, 11, 12]])
print(tensor_a.shape)
print(tensor_b)

torch.Size([2, 3])
tensor([[1, 2, 3],
        [4, 5, 6]])
---
torch.Size([2, 3])
tensor([[ 7,  8,  9],
        [10, 11, 12]])


In [None]:
# Concatenate along dimension 0
cat_dim0 = torch.cat([tensor_a, tensor_b], dim=0)
print(cat_dim0.shape)
print(cat_dim0)

torch.Size([4, 3])
tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]])


In [None]:
cat_dim1 = torch.cat([tensor_a, tensor_b], dim=1)
print(cat_dim1.shape)
print(cat_dim1)

torch.Size([2, 6])
tensor([[ 1,  2,  3,  7,  8,  9],
        [ 4,  5,  6, 10, 11, 12]])


### Tensor Indexing and Slicing

In [None]:
torch.arange(24)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23])

In [None]:
input_tensor = torch.arange(24).reshape(4, 6)
print(input_tensor.shape)
print(input_tensor)

torch.Size([4, 6])
tensor([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17],
        [18, 19, 20, 21, 22, 23]])


#### Basic Indexing

In [None]:
first_row_tensor = input_tensor[0]
print(first_row_tensor)

tensor([0, 1, 2, 3, 4, 5])


In [None]:
first_col_tensor = input_tensor[:, 0]
print(first_col_tensor)
print("is contigious: " + ("True" if first_col_tensor.is_contiguous() else "False"))

tensor([ 0,  6, 12, 18])
is contigious: False


In [None]:
specific_element = input_tensor[1,2]
print(specific_element)

tensor(8)


#### Slicing Operation

In [None]:
first_two_rows_tensor = input_tensor[:2]
print(first_two_rows_tensor)
print("is contigious: " + ("True" if first_two_rows_tensor.is_contiguous() else "False"))

tensor([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11]])
is contigious: True


In [None]:
first_two_cols_tensor = input_tensor[:, :2]
print(first_two_cols_tensor)
print("is contigious: " + ("True" if first_two_cols_tensor.is_contiguous() else "False"))

tensor([[ 0,  1],
        [ 6,  7],
        [12, 13],
        [18, 19]])
is contigious: False


In [None]:
# ✅ Left-inclusive, right-exclusive
submatrix_tensor = input_tensor[1:3, 2:4] # rows 1 and 2 (but not 3)
print(submatrix_tensor)
print("is contigious: " + ("True" if submatrix_tensor.is_contiguous() else "False"))

tensor([[ 8,  9],
        [14, 15]])
is contigious: False


#### Boolean indexing
* This selects only the elements where the mask is True
* The result is a **1D tensor** of just the odd numbers

In [None]:
input_tensor % 2 == 1

tensor([[False,  True, False,  True, False,  True],
        [False,  True, False,  True, False,  True],
        [False,  True, False,  True, False,  True],
        [False,  True, False,  True, False,  True]])

In [None]:
mask = input_tensor % 2 == 1
input_tensor[mask]

tensor([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23])

#### Advanced Indexing

In [None]:
input_tensor

tensor([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17],
        [18, 19, 20, 21, 22, 23]])

In [None]:
indices = torch.tensor([1, 2])
selected_rows = input_tensor[indices]
print(selected_rows.shape)
print(selected_rows)

torch.Size([2, 6])
tensor([[ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17]])


### Devices and Data Types

#### Data Types

In [None]:
int_tensor = torch.tensor([1, 2, 3], dtype=torch.int32)
print(int_tensor)

tensor([1, 2, 3], dtype=torch.int32)


In [None]:
float_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32)
print(float_tensor)
print(float_tensor.dtype)

tensor([1., 2., 3.])
torch.float32


In [None]:
double_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float64)
print(double_tensor)
print(double_tensor.dtype)

tensor([1., 2., 3.], dtype=torch.float64)
torch.float64


#### Type Conversion

In [None]:
converted_tensor = int_tensor.float()
print(converted_tensor)
print(converted_tensor.dtype)

tensor([1., 2., 3.])
torch.float32


#### Device Management

In [None]:
cpu_tensor = torch.randn(3, 3)
print(f"CPU tensor devide: {cpu_tensor.device}")

CPU tensor devide: cpu


#### Check if CUDA is available

In [None]:
if torch.cuda.is_available():
  gpu_tensor = cpu_tensor.cuda # move to GPU
  print(f"GPU tensor device: {gpu_tensor.device}")

  # create tensor on GPU
  gpu_direct_tensor = torch.randn(2, 3, device='cuda')
  print(f"Created directly on GPU: {gpu_direct_tensor.device}")

  # move back to CPU
  back_to_cpu_tensor = gpu_tensor.cpu()
  print(f"Moved back to CPU: {back_to_cpu_tensor.device}")
else:
  print("CUDA not available")

CUDA not available
