# TP4 Nano — Word2Vec (CBOW)

**Paper:** [Feature Learning in Infinite-Width Neural Networks](https://arxiv.org/abs/2011.14522)
**Code reference:** [edwardjhu/TP4](https://github.com/edwardjhu/TP4)

Nano Word2Vec: CBOW + negative sampling; widths 64, 256 and infinite-width μP.

In [None]:
import sys
from pathlib import Path
_cwd = Path.cwd().resolve()
if (_cwd / "tp" / "tp4_cbow.py").exists(): _root = _cwd
elif (_cwd.parent / "tp" / "tp4_cbow.py").exists(): _root = _cwd.parent
else: _root = Path("..").resolve()
sys.path.insert(0, str(_root))
import torch
from tp.tp4_cbow import train_word2vec_nano
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

In [None]:
tiny_corpus = (
    "the cat sat on the mat the dog ran on the mat the cat and the dog sat "
    * 500
).strip().split()

results_w2v = {}
for width in [64, 256]:
    model, losses = train_word2vec_nano(
        tiny_corpus,
        width=width,
        inf_width=False,
        epochs=5,
        lr=0.05,
        wd=0.001,
        max_tokens=15_000,
        max_vocab=1500,
        device=device,
    )
    results_w2v[f"width={width}"] = losses
    print(f"Width {width}: final loss = {losses[-1]:.4f}")

model_inf, losses_inf = train_word2vec_nano(
    tiny_corpus,
    width=None,
    inf_width=True,
    epochs=5,
    lr=0.05,
    wd=0.001,
    max_tokens=15_000,
    max_vocab=1500,
    device=device,
)
results_w2v["inf (μP)"] = losses_inf
print(f"Inf width (μP): final loss = {losses_inf[-1]:.4f}")

In [None]:
import matplotlib.pyplot as plt
for label, losses in results_w2v.items(): plt.plot(losses, label=label)
plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend(); plt.grid(True, alpha=0.3); plt.show()