<img src='https://theaiengineer.dev/tae_logo_gw_flat.png' alt='The Python Quants' width='35%' align='right'>


# Chapter 22 — Embeddings & Attention as Linear Algebra

This notebook mirrors the chapter’s NumPy formulations with tiny, fast checks.

In [None]:
import numpy as np

np.set_printoptions(precision=3, suppress=True)


In [None]:
def softmax_rows(S):
    S = S - S.max(axis=1, keepdims=True)
    E = np.exp(S)
    return E / (E.sum(axis=1, keepdims=True) + 1e-12)


In [None]:
def attention(Q, K, V, causal=False):
    d = Q.shape[1]
    S = (Q @ K.T) / np.sqrt(d)
    if causal:
        n = S.shape[0]
        mask = np.triu(np.ones((n, n), dtype=bool), k=1)
        S = S.copy(); S[mask] = -1e9
    A = softmax_rows(S)
    O = A @ V
    return O, A


In [None]:
rs = np.random.default_rng(22)
n, d, dv = 6, 4, 2
X = rs.normal(size=(n, d))
Wq = rs.normal(size=(d, d))
Wk = rs.normal(size=(d, d))
Wv = rs.normal(size=(d, dv))
Q, K, V = X @ Wq, X @ Wk, X @ Wv
O_nc, A_nc = attention(Q, K, V, causal=False)
O_c, A_c = attention(Q, K, V, causal=True)
print('rowsum(non-causal) →', np.round(A_nc.sum(1)[:3], 6))
print('future mass (causal) →', float(np.triu(A_c,1).sum()))


<img src='https://theaiengineer.dev/tae_logo_gw_flat.png' alt='The Python Quants' width='35%' align='right'>
