# 01 · PyTorch Basics

This notebook covers the fundamental building blocks of PyTorch:
- Tensor creation and properties
- Tensor operations (arithmetic, indexing, reshaping)
- Broadcasting
- Autograd (automatic differentiation)
- Moving tensors between CPU and GPU

In [None]:
import torch
import numpy as np

print(f'PyTorch version : {torch.__version__}')
print(f'CUDA available  : {torch.cuda.is_available()}')

## 1. Tensor Creation

In [None]:
# From Python list
a = torch.tensor([1.0, 2.0, 3.0])
print('from list       :', a)

# Zeros, ones, identity
print('zeros(2,3)      :\n', torch.zeros(2, 3))
print('ones(2,3)       :\n', torch.ones(2, 3))
print('eye(3)          :\n', torch.eye(3))

# Random tensors
torch.manual_seed(42)
print('rand(2,3)       :\n', torch.rand(2, 3))
print('randn(2,3)      :\n', torch.randn(2, 3))

# From NumPy
arr = np.array([4.0, 5.0, 6.0])
b = torch.from_numpy(arr)
print('from numpy      :', b)

## 2. Tensor Properties

In [None]:
x = torch.randn(3, 4)
print('shape   :', x.shape)       # torch.Size([3, 4])
print('ndim    :', x.ndim)        # 2
print('dtype   :', x.dtype)       # torch.float32
print('device  :', x.device)      # cpu
print('numel   :', x.numel())     # 12

## 3. Arithmetic & Reduction Operations

In [None]:
a = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
b = torch.tensor([[5.0, 6.0], [7.0, 8.0]])

print('a + b  :\n', a + b)
print('a * b  :\n', a * b)           # element-wise
print('a @ b  :\n', a @ b)           # matrix multiplication
print('a.T    :\n', a.T)             # transpose

print('sum    :', a.sum())
print('sum(0) :', a.sum(dim=0))      # column sums
print('mean   :', a.mean())
print('max    :', a.max())

## 4. Indexing & Reshaping

In [None]:
x = torch.arange(12, dtype=torch.float32)
print('arange  :', x)

x2d = x.reshape(3, 4)
print('reshape(3,4):\n', x2d)

# Indexing
print('row 0   :', x2d[0])
print('col 1   :', x2d[:, 1])
print('[1,2]   :', x2d[1, 2])

# Flatten & unsqueeze
print('flatten :', x2d.flatten())
print('unsqueeze(0):', x2d.unsqueeze(0).shape)   # (1,3,4)
print('squeeze      :', x2d.unsqueeze(0).squeeze(0).shape)  # (3,4)

## 5. Broadcasting

In [None]:
# PyTorch follows NumPy broadcasting rules
a = torch.ones(3, 1)   # shape (3,1)
b = torch.ones(1, 4)   # shape (1,4)
print('(3,1) + (1,4) =', (a + b).shape)   # (3,4)

# Practical example: subtract mean per feature
data = torch.randn(100, 5)
mean = data.mean(dim=0, keepdim=True)   # (1,5)
centered = data - mean                  # (100,5) – broadcasting over dim 0
print('centered mean ≈ 0:', centered.mean(dim=0).abs().max().item() < 1e-5)

## 6. Autograd — Automatic Differentiation

In [None]:
# Scalar example: f(x) = 3x² + 2x + 1  →  f'(x) = 6x + 2
x = torch.tensor(2.0, requires_grad=True)
f = 3 * x**2 + 2 * x + 1
f.backward()
print(f'f(2)        = {f.item():.1f}')         # 17.0
print(f'df/dx at 2  = {x.grad.item():.1f}')   # 14.0  (6*2 + 2)

In [None]:
# Vector / matrix example
w = torch.randn(3, 1, requires_grad=True)
X = torch.randn(10, 3)
y = torch.randn(10, 1)

loss = ((X @ w - y) ** 2).mean()
loss.backward()
print('w.grad shape:', w.grad.shape)   # (3,1)

In [None]:
# torch.no_grad() — disable gradient tracking (e.g. at inference time)
with torch.no_grad():
    pred = X @ w
print('requires_grad inside no_grad:', pred.requires_grad)  # False

## 7. Device Management (CPU ↔ GPU)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

t = torch.randn(3, 3)
t_dev = t.to(device)
print('tensor device:', t_dev.device)

# Move back to CPU for NumPy interop
t_cpu = t_dev.cpu()
arr = t_cpu.numpy()
print('as numpy array shape:', arr.shape)

## Summary

| Concept | Key function(s) |
|---|---|
| Create tensors | `torch.tensor`, `torch.zeros`, `torch.randn` |
| Shape / type | `.shape`, `.dtype`, `.device` |
| Reshape | `.reshape`, `.view`, `.unsqueeze`, `.squeeze` |
| Math | `+`, `*`, `@`, `.sum()`, `.mean()` |
| Autograd | `requires_grad=True`, `.backward()`, `.grad` |
| No grad | `torch.no_grad()` |
| Device | `.to(device)`, `.cpu()`, `.cuda()` |