# NumPy Broadcasting

Broadcasting enables arithmetic operations between arrays of different shapes.

In [None]:
import numpy as np
np.set_printoptions(precision=3)

## 1. Basic Broadcasting

Broadcasting allows NumPy to work with arrays of different shapes.

In [None]:
# Scalar with array
arr = np.array([1, 2, 3])
print(f"arr + 10 = {arr + 10}")
print(f"arr * 2 = {arr * 2}")

In [None]:
# 1D with 2D
matrix = np.array([[1, 2, 3],
                   [4, 5, 6]])
row = np.array([10, 20, 30])

print(f"Matrix (2x3):\n{matrix}\n")
print(f"Row (3,): {row}\n")
print(f"Matrix + Row:\n{matrix + row}")

## 2. Broadcasting Rules

NumPy compares shapes element-wise, starting from the trailing dimensions:

1. If dimensions are equal, they're compatible
2. If one dimension is 1, it can be stretched
3. If neither, shapes are incompatible

In [None]:
# Example: (3, 4) and (4,) -> compatible
# Shape comparison:
#   3 x 4
#       4
# -------
#   3 x 4 (4 matches 4, 3 pairs with nothing -> broadcast)

A = np.ones((3, 4))
b = np.array([1, 2, 3, 4])

print(f"A shape: {A.shape}")
print(f"b shape: {b.shape}")
print(f"A + b shape: {(A + b).shape}")
print(f"A + b:\n{A + b}")

In [None]:
# Example: (3, 1) and (1, 4) -> (3, 4)
col = np.array([[1], [2], [3]])  # Shape (3, 1)
row = np.array([[10, 20, 30, 40]])  # Shape (1, 4)

print(f"Column:\n{col}")
print(f"Row:\n{row}")
print(f"Column + Row:\n{col + row}")

In [None]:
# Incompatible example
try:
    A = np.ones((3, 4))
    b = np.ones((3,))  # Can't broadcast with (3, 4)
    result = A + b
except ValueError as e:
    print(f"Error: {e}")

## 3. Common ML Use Cases

In [None]:
# Use Case 1: Feature Normalization
# Subtract mean and divide by std for each feature

X = np.random.randn(100, 5)  # 100 samples, 5 features

# Compute per-feature statistics
mean = X.mean(axis=0)  # Shape (5,)
std = X.std(axis=0)     # Shape (5,)

print(f"X shape: {X.shape}")
print(f"Mean shape: {mean.shape}")

# Broadcasting: (100, 5) - (5,) -> (100, 5)
X_normalized = (X - mean) / std

print(f"After normalization:")
print(f"Mean: {X_normalized.mean(axis=0)}")
print(f"Std: {X_normalized.std(axis=0)}")

In [None]:
# Use Case 2: Adding Bias in Neural Networks
# output = Wx + b

batch_size = 32
n_neurons = 10

# Pre-activation: (batch_size, n_neurons)
z = np.random.randn(batch_size, n_neurons)

# Bias: one per neuron (n_neurons,)
bias = np.random.randn(n_neurons)

# Broadcasting adds bias to each sample
output = z + bias

print(f"z shape: {z.shape}")
print(f"bias shape: {bias.shape}")
print(f"output shape: {output.shape}")

In [None]:
# Use Case 3: Computing Pairwise Distances
# Useful for K-Nearest Neighbors

# 5 points in 2D
points = np.array([[0, 0], [1, 0], [0, 1], [1, 1], [0.5, 0.5]])

# Reshape for broadcasting:
# points[:, np.newaxis, :] has shape (5, 1, 2)
# points[np.newaxis, :, :] has shape (1, 5, 2)
# Difference has shape (5, 5, 2)

diff = points[:, np.newaxis, :] - points[np.newaxis, :, :]
distances = np.sqrt((diff ** 2).sum(axis=2))

print("Pairwise distance matrix:")
print(distances)

In [None]:
# Use Case 4: Softmax Function
def softmax(x):
    """Compute softmax with numerical stability."""
    # Subtract max for numerical stability (broadcast along axis 1)
    x_shifted = x - x.max(axis=1, keepdims=True)
    exp_x = np.exp(x_shifted)
    return exp_x / exp_x.sum(axis=1, keepdims=True)

# Batch of logits
logits = np.array([[2.0, 1.0, 0.1],
                   [1.0, 2.0, 3.0]])

probs = softmax(logits)
print(f"Probabilities:\n{probs}")
print(f"Sum per row: {probs.sum(axis=1)}")

## 4. Broadcasting with `np.newaxis`

In [None]:
# Outer product via broadcasting
a = np.array([1, 2, 3])
b = np.array([4, 5])

# Without newaxis: incompatible shapes
# a: (3,), b: (2,)

# With newaxis:
# a[:, np.newaxis]: (3, 1)
# b[np.newaxis, :]: (1, 2)
# Result: (3, 2)

outer = a[:, np.newaxis] * b[np.newaxis, :]
print(f"Outer product:\n{outer}")

# Equivalent to:
print(f"np.outer:\n{np.outer(a, b)}")

## 5. Performance Benefits

Broadcasting is more memory-efficient than explicit loops or replication.

In [None]:
import time

X = np.random.randn(10000, 100)
mean = X.mean(axis=0)

# Method 1: Broadcasting (efficient)
start = time.time()
for _ in range(100):
    result1 = X - mean
broadcast_time = time.time() - start

# Method 2: Explicit loop (slow)
start = time.time()
for _ in range(100):
    result2 = np.zeros_like(X)
    for i in range(X.shape[0]):
        result2[i] = X[i] - mean
loop_time = time.time() - start

print(f"Broadcasting: {broadcast_time:.4f}s")
print(f"Loop: {loop_time:.4f}s")
print(f"Speedup: {loop_time/broadcast_time:.1f}x")

## 6. Exercises

In [None]:
# Exercise 1: Center each row of a matrix to have zero mean
X = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])

# Your code here
row_means = X.mean(axis=1, keepdims=True)
X_centered = X - row_means
print(f"Centered:\n{X_centered}")
print(f"Row means: {X_centered.mean(axis=1)}")

In [None]:
# Exercise 2: Compute cosine similarity between all pairs of vectors
vectors = np.array([[1, 0],
                    [0, 1],
                    [1, 1]])

# Normalize vectors
norms = np.linalg.norm(vectors, axis=1, keepdims=True)
normalized = vectors / norms

# Cosine similarity = dot product of normalized vectors
similarity = normalized @ normalized.T
print(f"Cosine similarity matrix:\n{similarity}")