## Synthetic pair generation (new)
Create **brand-new** valid-ish levels, corrupt them, and export JSONL pairs for training.

In [None]:
import json, random
from pathlib import Path

random.seed(17)

# ---------------- Config ----------------
N_TRAIN, N_VAL = 200, 40
W, H = 64, 16
OUT_DIR = Path("data")
OUT_DIR.mkdir(parents=True, exist_ok=True)
OUT_TRAIN = OUT_DIR / "train_pairs.jsonl"
OUT_VAL   = OUT_DIR / "val_pairs.jsonl"

# Token vocabulary
VOCAB = ['M','F','y','Y','E','g','G','k','K','r','X','#','%','|','*','B','b','?','@','Q','!','1','2','D','S','C','U','L','o','t','T','<','>','[',']']
BACKGROUND = '|'

def blank_level():
    return [[BACKGROUND for _ in range(W)] for _ in range(H)]

def add_ground(level, min_h=1):
    for y in range(H-1, H-1-min_h, -1):
        for x in range(W):
            level[y][x] = 'X'

def add_platforms(level, n=8):
    import random
    for _ in range(n):
        y = random.randint(5, H-4)
        x0 = random.randint(2, W-8)
        length = random.randint(3, 8)
        for x in range(x0, min(W-1, x0+length)):
            level[y][x] = 'S'

def add_coins(level, n=60):
    import random
    for _ in range(n):
        x = random.randint(1, W-2)
        y = random.randint(3, H-5)
        if level[y][x] == BACKGROUND:
            level[y][x] = 'o'

def add_enemies(level, n=20):
    import random
    for _ in range(n):
        x = random.randint(2, W-3)
        y = H-2
        while y > 1 and level[y+1][x] == BACKGROUND:
            y += 1
        if level[y][x] in (BACKGROUND, 'o'):
            level[y][x] = random.choice(['E','g','G','k','K','r'])

def add_pipe(level):
    import random
    x0 = random.randint(6, W-8)
    ground_y = H-2
    ph = random.randint(2, 4)
    for dy in range(ph):
        level[ground_y - dy][x0]   = '['
        level[ground_y - dy][x0+1] = ']'
    level[ground_y - ph][x0]   = '<'
    level[ground_y - ph][x0+1] = '>'
    if random.random() < 0.2:
        level[ground_y - ph - 1][x0] = 'T'

def place_M_and_F(level):
    import random
    mx = random.randint(1, 4)
    my = H-2
    while my > 0 and level[my+1][mx] == BACKGROUND:
        my += 1
    level[my][mx] = 'M'
    fx = random.randint(W-6, W-3)
    fy = H-2
    while fy > 0 and level[fy+1][fx] == BACKGROUND:
        fy += 1
    level[fy][fx] = 'F'

def gen_clean_level():
    lvl = blank_level()
    add_ground(lvl, min_h=1)
    add_platforms(lvl, n=random.randint(6, 10))
    add_coins(lvl, n=random.randint(40, 80))
    add_enemies(lvl, n=random.randint(10, 25))
    for _ in range(random.randint(1, 3)):
        add_pipe(lvl)
    place_M_and_F(lvl)
    return lvl

def flatten_tokens(level):
    return " ".join(level[y][x] for y in range(H) for x in range(W))

def corrupt_tokens(tokens, drop_p=0.02, flip_p=0.05, insert_p=0.01, remove_M_prob=0.25, remove_F_prob=0.20):
    import random
    out = []
    for t in tokens:
        r = random.random()
        if t == 'M' and random.random() < remove_M_prob:
            continue
        if t == 'F' and random.random() < remove_F_prob:
            continue
        if r < drop_p:
            continue
        elif r < drop_p + flip_p:
            out.append(random.choice(VOCAB))
        else:
            out.append(t)
            if random.random() < insert_p:
                out.append(random.choice(VOCAB))
    return out or tokens

def gen_pair():
    clean = gen_clean_level()
    clean_tokens = flatten_tokens(clean).split()
    corrupted = corrupt_tokens(clean_tokens)
    return " ".join(corrupted), " ".join(clean_tokens)

def write_pairs(n, path):
    with path.open("w") as f:
        for _ in range(n):
            corr, clean = gen_pair()
            f.write(json.dumps({"corrupted": corr, "repaired": clean}) + "\n")
    print(f"Wrote {n} pairs -> {path}")

write_pairs(N_TRAIN, OUT_TRAIN)
write_pairs(N_VAL,   OUT_VAL)
print("Synthetic pair generation complete.")

## Training (compat)
Version-adaptive training cell that works across older/newer `transformers` installs.


In [None]:

from datasets import load_dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    TrainingArguments, Trainer, DataCollatorForLanguageModeling,
    __version__ as TR_VER
)
from inspect import signature

print("Transformers version:", TR_VER)

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
OUTPUT_DIR = "out/llm-sft-pairs-compat"

train_file = "data/train_pairs.jsonl"
val_file   = "data/val_pairs.jsonl"

INSTR_FIELD = "corrupted"
RESP_FIELD  = "repaired"
BLOCK_SIZE  = 512
BATCH_TOKENS = 4096

def to_text(example):
    instr = str(example[INSTR_FIELD]).strip()
    resp  = str(example[RESP_FIELD]).strip()
    example["text"] = f"<s>Instruction:\\n{instr}\\n\\nResponse:\\n{resp}</s>"
    return example

train_ds = load_dataset("json", data_files=train_file, split="train").map(to_text)
val_ds   = load_dataset("json", data_files=val_file,   split="train").map(to_text)

tok = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token

def tok_fn(examples):
    return tok(examples["text"], truncation=True, max_length=BLOCK_SIZE)

train_tok = train_ds.map(tok_fn, batched=True, remove_columns=["text"])
val_tok   = val_ds.map(tok_fn,   batched=True, remove_columns=["text"])

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
collator = DataCollatorForLanguageModeling(tokenizer=tok, mlm=False)

per_device_train_batch_size = 1
gradient_accumulation_steps = max(1, BATCH_TOKENS // (per_device_train_batch_size * BLOCK_SIZE))
per_device_eval_batch_size  = 1

sig = signature(TrainingArguments.__init__).parameters
def supports(name): return name in sig

kwargs = dict(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,
    learning_rate=2e-5,
    warmup_steps=200,
    weight_decay=0.01,
    logging_steps=50,
    save_steps=1000,
    eval_steps=500,
    gradient_accumulation_steps=gradient_accumulation_steps,
    fp16=True,
)

# Batch size args (per_device vs per_gpu)
if supports("per_device_train_batch_size"):
    kwargs["per_device_train_batch_size"] = per_device_train_batch_size
    kwargs["per_device_eval_batch_size"] = per_device_eval_batch_size
elif supports("per_gpu_train_batch_size"):
    kwargs["per_gpu_train_batch_size"] = per_device_train_batch_size
    if supports("per_gpu_eval_batch_size"):
        kwargs["per_gpu_eval_batch_size"] = per_device_eval_batch_size

# Scheduler + reporting
if supports("lr_scheduler_type"): kwargs["lr_scheduler_type"] = "cosine"
if supports("save_total_limit"):  kwargs["save_total_limit"] = 2
if supports("report_to"):         kwargs["report_to"] = "none"

# Evaluation control
set_eval_after = False
if supports("evaluation_strategy"):
    kwargs["evaluation_strategy"] = "steps"
elif supports("evaluate_during_training"):
    kwargs["evaluate_during_training"] = True
else:
    # We'll evaluate explicitly after training
    set_eval_after = True

# Save strategy if available
if supports("save_strategy"):
    kwargs["save_strategy"] = "steps"

args = TrainingArguments(**kwargs)

trainer = Trainer(
    model=model,
    args=args,
    data_collator=collator,
    train_dataset=train_tok,
    eval_dataset=val_tok,
)

trainer.train()

if set_eval_after:
    print("Running post-hoc evaluation (no evaluation_strategy supported by this version)...")
    metrics = trainer.evaluate(eval_dataset=val_tok)
    print("Eval metrics:", metrics)

trainer.save_model(OUTPUT_DIR)
tok.save_pretrained(OUTPUT_DIR)

print("Training (compat) finished. Saved to:", OUTPUT_DIR)


  from .autonotebook import tqdm as notebook_tqdm


Transformers version: 4.56.1


Map: 100%|█████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 686.08 examples/s]


Step,Training Loss
