## 5.1

### 5.1.1

In [7]:
import torch
from model import GPTModel

In [2]:
GPT_CONFIG_124M = {
    "vocab_size": 50257,
    "context_len": 256, # changed from 1024 to 256 for making it run on normal machines
    "emb_dim": 768,
    "n_heads": 12,
    "n_layers": 12,
    "drop_rate_emb": 0.1,
    "drop_rate_sc": 0.1,
    "drop_rate_mha": 0.1,
    "qkv_bias": False
}

In [3]:
torch.manual_seed(123)
model = GPTModel(GPT_CONFIG_124M)
model.eval()

GPTModel(
  (tok_emb): Embedding(50257, 768)
  (pos_emb): Embedding(256, 768)
  (drop_emb): Dropout(p=0.1, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (mha): MultiHeadAttention(
        (W_q): Linear(in_features=768, out_features=768, bias=False)
        (W_k): Linear(in_features=768, out_features=768, bias=False)
        (W_v): Linear(in_features=768, out_features=768, bias=False)
        (out_proj): Linear(in_features=768, out_features=768, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (dropout_sc): Dropout(p=0.1, inplace=False)
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU()
          (2): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
    )
    (1): TransformerBlock(
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (mha): MultiHeadAttenti

In [4]:
def generate_text_simple(model, idx, max_new_tokens, context_size):
    for _ in range(max_new_tokens):
        idx_count = idx[:, -context_size:] # (batch, n_tokens)
        with torch.no_grad():
            logits = model(idx_count)

        logits = logits[:, -1, :] # (batch, n_token, vocab_size)
        probs = torch.softmax(logits, dim=-1) # (batch, vocab_size)
        idx_next = torch.argmax(probs, dim=-1, keepdim=True)
        idx = torch.cat((idx, idx_next), dim=1)
    
    return idx

In [5]:
import tiktoken

def text_to_token_ids(text: str, tokenizer: tiktoken.Encoding):
    encoded = tokenizer.encode(text, allowed_special={'<|endoftext|>'})
    enocded_tensor = torch.tensor(encoded).unsqueeze(0)
    return enocded_tensor

def token_ids_to_text(token_ids, tokenizer: tiktoken.Encoding):
    flat = token_ids.squeeze(0)
    return tokenizer.decode(flat.tolist())

In [6]:
start_context = "Every effort moves you"
tokenizer = tiktoken.get_encoding("gpt2")

token_ids = generate_text_simple(
    model=model,
    idx=text_to_token_ids(start_context, tokenizer),
    max_new_tokens=10,
    context_size=GPT_CONFIG_124M["context_len"]
)
print(token_ids)
print(token_ids_to_text(token_ids, tokenizer))

tensor([[ 6109,  3626,  6100,   345, 34245,  5139,  2492, 25405, 17434, 17853,
          5308,  3398, 13174, 43071]])
Every effort moves you rentingetic wasnم refres RexMeCHicular stren


### 5.1.3