In [121]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model, set_peft_model_state_dict

# Load a base model and tokenizer
base_model_name = "gpt2"  # You can choose any model
model = AutoModelForCausalLM.from_pretrained(base_model_name)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model.config.pad_token_id = tokenizer.pad_token_id
model.config.eos_token_id = tokenizer.eos_token_id
model.save_pretrained("../out/base")

In [122]:
# Define LoRA configuration with random settings
lora_config = LoraConfig(
    r=8,  # Low-rank dimension
    lora_alpha=16,  # Scaling factor
    lora_dropout=0.1,  # Dropout for LoRA layers
    bias="none",  # Bias setting for LoRA layers
    task_type="CAUSAL_LM"  # Task type for the model
)

# Wrap the model with LoRA
lora_model = get_peft_model(model, lora_config)



In [123]:
# Generate random garbage data
garbage_data = [
    "This is some random garbage text.",
    "asdfghjkl qwertyuiop zxcvbnm.",
    "Lorem ipsum dolor sit amet.",
    "Random text 1234567890 !@#$%^&*()."
]

# Tokenize the garbage data
inputs = tokenizer(garbage_data, return_tensors="pt", padding=True, truncation=True)

# Train LoRA on garbage data (1 epoch as an example)
lora_model.train()
optimizer = torch.optim.AdamW(lora_model.parameters(), lr=5e-3)

for epoch in range(20):
    optimizer.zero_grad()
    outputs = lora_model(**inputs, labels=inputs["input_ids"])
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

Epoch 1, Loss: 6.090549945831299
Epoch 2, Loss: 5.509261608123779
Epoch 3, Loss: 4.62644100189209
Epoch 4, Loss: 3.4101741313934326
Epoch 5, Loss: 3.1889259815216064
Epoch 6, Loss: 2.7818543910980225
Epoch 7, Loss: 2.2625763416290283
Epoch 8, Loss: 1.9884928464889526
Epoch 9, Loss: 1.8503243923187256
Epoch 10, Loss: 1.4861212968826294
Epoch 11, Loss: 1.204071044921875
Epoch 12, Loss: 0.8121559023857117
Epoch 13, Loss: 0.4984126687049866
Epoch 14, Loss: 1.2177890539169312
Epoch 15, Loss: 0.709617555141449
Epoch 16, Loss: 1.489302396774292
Epoch 17, Loss: 0.46517083048820496
Epoch 18, Loss: 0.2087053805589676
Epoch 19, Loss: 0.47164133191108704
Epoch 20, Loss: 0.37333473563194275


In [124]:
lora_model.save_pretrained("../out/lora")

In [125]:
from peft import PeftModel

lora_model = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained("../out/base"), "../out/lora")
base_model = AutoModelForCausalLM.from_pretrained("../out/base")

In [127]:
input_text = f"This is some random"
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

lora_model.eval()

models = [
    ("base", base_model), 
    ("lora", lora_model.to('cpu')),
]
for name, model in models:
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=50,
        do_sample=False,
        temperature=0.0,  # No randomness
        pad_token_id=model.config.pad_token_id,  # Explicitly set
        eos_token_id=model.config.eos_token_id   # Explicitly set
    )
    print(name)
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

base
This is some random stuff I've been thinking about for a while.

I'm not sure if I'm going to write a post about it, but I'm going to try to keep it short and sweet.

I'm going to start with a simple
lora
This is some random garbage text.
