In [1]:
!git clone https://github.com/CarperAI/trlx.git

!cd trlx
!pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 # for cuda
!pip install -e . 
! pip install 'accelerate>=0.26.0'
! pip install --upgrade bitsandbytes --no-cache-dir
! pip install -U peft
! pip install evaluate
! pip install rouge_score
! pip install bert_score

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu116
[0mObtaining file:///notebooks
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
Collecting huggingface-hub<1.0,>=0.26.0 (from transformers==4.51.0.dev0)
  Downloading huggingface_hub-0.30.1-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers==4.51.0.dev0)
  Downloading tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.3 (from transformers==4.51.0.dev0)
  Downloading safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading huggingface_hub-0.30.1-py3-none-any.whl (481 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m481.2/481.2 kB[0m [31

In [1]:
import json

import pandas as pd
import torch
from datasets import load_dataset
from torch.utils.data import Dataset

class TLDRDataset(Dataset):
    def __init__(self, train_path, tokenizer, split, max_length=550):
        self.post_list = []
        dataset = load_dataset(train_path, split=split)
        for sample in dataset:
            self.post_list.append(sample["prompt"] + sample["label"])
        if "valid" in split:
            self.post_list = self.post_list[0:2000]
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.input_ids = []
        self.attn_masks = []

    def __len__(self):
        return len(self.post_list)

    def __getitem__(self, idx):
        txt = self.post_list[idx]
        encodings_dict = self.tokenizer(txt, truncation=True, max_length=self.max_length, padding="max_length")
        input_ids = torch.tensor(encodings_dict["input_ids"])
        attn_masks = torch.tensor(encodings_dict["attention_mask"])

        return {
            "input_ids": input_ids,
            "attention_mask": attn_masks,
            "labels": input_ids,
        }


In [2]:
import random

import evaluate
import numpy as np
import torch
# from summarize_dataset import TLDRDataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    default_data_collator,
)

2025-04-06 00:38:23.337048: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-06 00:38:23.337101: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-06 00:38:23.338178: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-06 00:38:23.344108: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
def set_seed(seed_val=42):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

In [8]:
output_dir = "/notebooks/gpt2-supervised-summarize-checkpoint"
train_batch_size = 16
gradient_accumulation_steps = 1
learning_rate = 1e-5
eval_batch_size = 1
eval_steps = 500
max_input_length = 550
save_steps = 1000
num_train_epochs = 5
random.seed(42)

tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2", use_cache=False)
tokenizer.pad_token = tokenizer.eos_token
model.resize_token_embeddings(len(tokenizer))
tokenizer.pad_token_id = tokenizer.eos_token_id
model.config.end_token_id = tokenizer.eos_token_id
model.config.pad_token_id = model.config.eos_token_id

In [9]:
data_path = "CarperAI/openai_summarize_tldr"
train_dataset = TLDRDataset(
    data_path,
    tokenizer,
    "train",
    max_length=max_input_length,
)
dev_dataset = TLDRDataset(
    data_path,
    tokenizer,
    "valid",
    max_length=max_input_length,
)



README.md:   0%|          | 0.00/532 [00:00<?, ?B/s]

(…)-00000-of-00001-e8c59e5cf7bce1c0.parquet:   0%|          | 0.00/111M [00:00<?, ?B/s]

(…)-00000-of-00001-59ffb27399371eac.parquet:   0%|          | 0.00/6.23M [00:00<?, ?B/s]

(…)-00000-of-00001-0e33e6bd86e3edc9.parquet:   0%|          | 0.00/6.12M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/116722 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6553 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/6447 [00:00<?, ? examples/s]

In [10]:
dataset = load_dataset(data_path, split='train')

In [8]:
dataset

Dataset({
    features: ['prompt', 'label'],
    num_rows: 116722
})

In [18]:
# Set up the metric
rouge = evaluate.load("rouge")

def compute_metrics(eval_preds):
    labels_ids = eval_preds.label_ids
    pred_ids = eval_preds.predictions
    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)
    result = rouge.compute(predictions=pred_str, references=label_str)
    return result

# Create a preprocessing function to extract out the proper logits from the model output
def preprocess_logits_for_metrics(logits, labels):
    if isinstance(logits, tuple):
        logits = logits[0]
    return logits.argmax(dim=-1)


In [21]:
# Prepare the trainer and start training
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="steps",
    eval_accumulation_steps=1,
    learning_rate=learning_rate,
    per_device_train_batch_size=train_batch_size,
    per_device_eval_batch_size=eval_batch_size,
    gradient_checkpointing=True,
    half_precision_backend=True,
    fp16=True,
    adam_beta1=0.9,
    adam_beta2=0.95,
    gradient_accumulation_steps=gradient_accumulation_steps,
    num_train_epochs=num_train_epochs,
    warmup_steps=100,
    eval_steps=eval_steps,
    save_steps=save_steps,
    max_steps=29000,
    load_best_model_at_end=True,
    logging_steps=50,
    # deepspeed="/notebooks/ds_config_gptj.json",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    compute_metrics=compute_metrics,
    data_collator=default_data_collator,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
)



In [23]:
trainer.train()
trainer.save_model(output_dir)

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f6a9807a610>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7f6b01212d50, raw_cell="trainer.train(resume_from_checkpoint="/notebooks/g.." store_history=True silent=False shell_futures=True cell_id=a6a76c5c-2236-42ac-85d0-b54c796e661d>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given

There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Step,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
28500,1.8976,1.860059,0.59309,0.190521,0.393006,0.516792
29000,1.8894,1.860014,0.592969,0.190446,0.393017,0.516693


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f6a9807a610>> (for post_run_cell), with arguments args (<ExecutionResult object at 7f6b01211590, execution_count=23 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7f6b01212d50, raw_cell="trainer.train(resume_from_checkpoint="/notebooks/g.." store_history=True silent=False shell_futures=True cell_id=a6a76c5c-2236-42ac-85d0-b54c796e661d> result=None>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given

In [None]:
# Save trained model to pytorch.bin
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("/notebooks/gpt2-supervised-summarize-checkpoint/").to("cuda")
# tokenizer = AutoTokenizer.from_pretrained("gpt2")
# tokenizer.pad_token = tokenizer.eos_token
model.save_pretrained("/notebooks/gpt2-supervised-summarize-checkpoint/", safe_serialization=False)

In [24]:
import wandb
wandb.finish()

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f6a9807a610>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7f6bf8cc7350, raw_cell="import wandb
wandb.finish()" store_history=True silent=False shell_futures=True cell_id=bcba10b3-da88-47b4-a7a5-dac4a92a3cfd>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given

0,1
eval/loss,█▁
eval/rouge1,█▁
eval/rouge2,█▁
eval/rougeL,▁█
eval/rougeLsum,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▁███████████████████████
train/global_step,▁▁▁▁███████████████████████

0,1
eval/loss,1.86001
eval/rouge1,0.59297
eval/rouge2,0.19045
eval/rougeL,0.39302
eval/rougeLsum,0.51669
eval/runtime,136.6279
eval/samples_per_second,14.638
eval/steps_per_second,14.638
total_flos,1.304460153216e+17
train/epoch,3.97478


In [7]:
from datasets import load_dataset

dataset = load_dataset("CarperAI/openai_summarize_tldr", split="train")
print(dataset[0])

Downloading readme:   0%|          | 0.00/532 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/111M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/6.23M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/6.12M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/116722 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6553 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/6447 [00:00<?, ? examples/s]

{'prompt': "SUBREDDIT: r/relationships\nTITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting\nPOST: Not sure if this belongs here but it's worth a try. \n\nBackstory:\nWhen I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. \n\nNow: Its been almost 3 years now and I've gotten better after counselling and mild anti depressants. My mothe

In [37]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained("gpt2")  
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
model = AutoModelForCausalLM.from_pretrained("/notebooks/gpt2-supervised-summarize-checkpoint/checkpoint-29000")
model.resize_token_embeddings(len(tokenizer))
model.to(device)
model.eval()

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

2025-04-06 19:24:18.075346: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-06 19:24:18.075411: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-06 19:24:18.076596: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-06 19:24:18.083371: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): lora.Linear(
            (base_layer): Conv1D(nf=2304, nx=768)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=768, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=8, out_features=2304, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=Fa

In [8]:
def batch_generate(batch):
    inputs = tokenizer(
        batch["prompt"],
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512
    )
    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=64,
            do_sample=False,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    summaries = [text.split("TL;DR:")[-1].strip() for text in decoded]
    return {"generated": summaries}


In [3]:
from datasets import load_dataset

dataset = load_dataset("CarperAI/openai_summarize_tldr", split="test")
dataset = dataset.map(batch_generate, batched=True, batch_size=8)

NameError: name 'batch_generate' is not defined

In [4]:
from datasets import load_dataset
dataset = load_dataset("CarperAI/openai_summarize_tldr", split="test")
dataset

Dataset({
    features: ['prompt', 'label'],
    num_rows: 6553
})

In [11]:
import evaluate
rouge = evaluate.load("rouge")

predictions = dataset["generated"]
references = [ref.strip() for ref in dataset["label"]]  # or "completion"

results = rouge.compute(predictions=predictions, references=references, use_stemmer=True)

for k, v in results.items():
    print(f"{k}: {v:.4f}")


rouge1: 0.3156
rouge2: 0.1086
rougeL: 0.2436
rougeLsum: 0.2435


In [13]:
import evaluate
rouge = evaluate.load("rouge")

predictions = dataset["generated"]
references = [ref.strip() for ref in dataset["label"]]  # or "completion"

results = rouge.compute(predictions=predictions, references=references, use_stemmer=True)

for k, v in results.items():
    print(f"{k}: {v:.4f}")

2025-04-02 19:10:41.148499: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-02 19:10:41.148558: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-02 19:10:41.151981: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-02 19:10:41.172595: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

rouge1: 0.0001
rouge2: 0.0000
rougeL: 0.0001
rougeLsum: 0.0001


In [14]:
print(tokenizer.decode(model.generate(tokenizer("Explain gravity", return_tensors="pt").input_ids.to(model.device))[0]))

Explain gravitycludedcludedcludedcludedcludedcludedcludedcludedcludedcludedveveveveveveve




In [15]:
! pip install peft==0.7.1

Collecting peft==0.7.1
  Downloading peft-0.7.1-py3-none-any.whl.metadata (25 kB)
Downloading peft-0.7.1-py3-none-any.whl (168 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
  Attempting uninstall: peft
    Found existing installation: peft 0.6.2
    Uninstalling peft-0.6.2:
      Successfully uninstalled peft-0.6.2
Successfully installed peft-0.7.1
[0m

In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load base model
base_model = AutoModelForCausalLM.from_pretrained("gpt2")

# Load LoRA adapter checkpoint
dpo_model2 = PeftModel.from_pretrained(base_model, "/notebooks/dpo_gpt2_lora_out_openai_data/checkpoint-34701")
dpo_model2.to(device)

# Tokenize input and move to the correct device
input_text = dataset[0]['prompt']
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

# Generate output
dpo_output = dpo_model2.generate(input_ids, max_new_tokens=50)

# Decode and print the result
print("DPO:", tokenizer.decode(dpo_output[0], skip_special_tokens=True))


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


DPO: SUBREDDIT: r/relationships
TITLE: Me [19 F] with my friend [19 M], not sure if I may have messed things up already.
POST: Hello hello everybody. I hope this isn't too trivial of a question to ask on here, but I've been feeling a bit out of my depth when it comes to this situation (I've had only one relationship before, and for many reasons, it was out of the ordinary).

Okay! So, a couple of weeks ago, I started talking to this guy on Facebook, through a student group that we were both part of. I thought he was sort of cute, so I sent him a PM just to talk, etc, etc. We're both transfer students at the same school, so I knew that we could eventually meet in person once we both moved on-campus. So, we did, and we hung out maybe twice, just as friends.

Okay. So, everything is going pretty well. We talk over Facebook and Snapchat, whatever. So, Saturday night, I was just hanging out with people and kind of being bored, when I got a Snapchat from him asking what I was doing. I asked 

In [7]:
dataset[0]['prompt']

'SUBREDDIT: r/relationships\nTITLE: Me [19 F] with my friend [19 M], not sure if I may have messed things up already.\nPOST: Hello hello everybody. I hope this isn\'t too trivial of a question to ask on here, but I\'ve been feeling a bit out of my depth when it comes to this situation (I\'ve had only one relationship before, and for many reasons, it was out of the ordinary).\n\nOkay! So, a couple of weeks ago, I started talking to this guy on Facebook, through a student group that we were both part of. I thought he was sort of cute, so I sent him a PM just to talk, etc, etc. We\'re both transfer students at the same school, so I knew that we could eventually meet in person once we both moved on-campus. So, we did, and we hung out maybe twice, just as friends.\n\nOkay. So, everything is going pretty well. We talk over Facebook and Snapchat, whatever. So, Saturday night, I was just hanging out with people and kind of being bored, when I got a Snapchat from him asking what I was doing. I 

In [8]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the fine-tuned GPT-2 model checkpoint (not a LoRA adapter)
model = AutoModelForCausalLM.from_pretrained("/notebooks/dpo-gpt2-summarize/checkpoint-500")
model.to(device)
model.eval()

# Tokenize input and move to the correct device
input_text = dataset[0]['prompt']
inputs = tokenizer(input_text, return_tensors="pt").to(device)

# Generate output
with torch.no_grad():
    output_ids = model.generate(
        **inputs,
        max_new_tokens=50,
        pad_token_id=tokenizer.eos_token_id
    )

# Decode and print the result
print("DPO:", tokenizer.decode(output_ids[0], skip_special_tokens=True))


DPO: SUBREDDIT: r/relationships
TITLE: Me [19 F] with my friend [19 M], not sure if I may have messed things up already.
POST: Hello hello everybody. I hope this isn't too trivial of a question to ask on here, but I've been feeling a bit out of my depth when it comes to this situation (I've had only one relationship before, and for many reasons, it was out of the ordinary).

Okay! So, a couple of weeks ago, I started talking to this guy on Facebook, through a student group that we were both part of. I thought he was sort of cute, so I sent him a PM just to talk, etc, etc. We're both transfer students at the same school, so I knew that we could eventually meet in person once we both moved on-campus. So, we did, and we hung out maybe twice, just as friends.

Okay. So, everything is going pretty well. We talk over Facebook and Snapchat, whatever. So, Saturday night, I was just hanging out with people and kind of being bored, when I got a Snapchat from him asking what I was doing. I asked 

In [6]:
import os
os.environ["PYTORCH_SDP_ATTENTION"] = "0"

In [23]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

base_model_name = "Qwen/Qwen2-1.5B-Instruct"  # <- no LoRA pre-applied
adapter_path = "dpo-gpt2-summarize/checkpoint-500/qwen-dpo-checkpoint/checkpoint-80"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True,
)

model = PeftModel.from_pretrained(model, adapter_path)


In [24]:
dataset[0]['prompt']

"SUBREDDIT: r/relationships\nTITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting\nPOST: Not sure if this belongs here but it's worth a try. \n\nBackstory:\nWhen I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. \n\nNow: Its been almost 3 years now and I've gotten better after counselling and mild anti depressants. My mother has been 

In [25]:
# Original content without TL;DR:
content = dataset[0]['prompt'].strip()
content = re.sub(r"TL;DR:\s*$", "", content)

# Wrap in chat format
messages = [
    {"role": "system", "content": "You are a helpful AI assistant that summarizes Reddit posts."},
    {"role": "user", "content": f"Summarize the following post:\n{content}"}
]

# Tokenize using chat template
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    return_tensors="pt"
).to(model.device)


In [28]:
import re
from transformers import AutoTokenizer
from tqdm import tqdm
import torch

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# Pattern to catch Coherence/Consistency/Fluency used like scores
score_pattern = re.compile(r"^\s*(Coherence|Consistency|Fluency)\s*:\s*\d+(\.\d+)?", re.IGNORECASE | re.MULTILINE)

# Loop through dataset
for i, example in enumerate(tqdm(dataset)):
    # Remove TL;DR and format prompt
    content = re.sub(r"TL;DR:\s*$", "", example["prompt"].strip())

    messages = [
        {"role": "system", "content": "You are a helpful AI assistant that summarizes Reddit posts."},
        {"role": "user", "content": f"Summarize the following post:\n{content}"}
    ]

    input_text = tokenizer.apply_chat_template(messages, tokenize=False)

    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output_ids = model.generate(**inputs, max_new_tokens=200)
        decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Check for bad formatting
    if score_pattern.search(decoded):
        print(f"\n🚨 Problem found at index {i}")
        print("Prompt:\n", input_text)
        print("Generated:\n", decoded)
        break  # or remove this to continue scanning all


  0%|          | 9/116722 [00:56<203:39:46,  6.28s/it]


🚨 Problem found at index 9
Prompt:
 <|im_start|>system
You are a helpful AI assistant that summarizes Reddit posts.<|im_end|>
<|im_start|>user
Summarize the following post:
SUBREDDIT: r/relationships
TITLE: Me [20/F] having trouble with boyfriend [23/m] of 2.5 years sex drive
POST: My boyfriend and I have an amazing sex life. We are very sexually compatible.

Although, his sex drive is crazy high compared to mine. It really depends on the time of month for me [hormones], sometimes I'll be down to go 3 times a day and sometimes I'm only interested in once a week. 

I'm asking for advice because I have a hard time denying him without him feeling rejected by me. For example, every time we cuddle he is almost always coming on to me. At night he comes on to me, in the middle of the night he masturbates next to me, and tries to come onto me in the morning. I'm utterly flattered that after being in a relationship this long he is still very turned on by me, but sometimes it is very overwhelmi




In [27]:
from transformers import AutoTokenizer
import re

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct", trust_remote_code=True)

# Your input content (without TL;DR)
content = re.sub(r"TL;DR:\s*$", "", dataset[0]['prompt'].strip())

# Format it as a chat
messages = [
    {"role": "system", "content": "You are a helpful AI assistant that summarizes Reddit posts."},
    {"role": "user", "content": f"Summarize the following post:\n{content}"}
]

# 🔍 View the raw chat-formatted input
input_text = tokenizer.apply_chat_template(messages, tokenize=False)
print("Formatted Input:\n", input_text)

# ✅ Now tokenize it
input_ids = tokenizer(
    input_text,
    return_tensors="pt"
).input_ids.to(model.device)

# 🔮 Generate output
dpo_output = model.generate(input_ids, max_new_tokens=200)

# 📝 Decode and print result
print("DPO:", tokenizer.decode(dpo_output[0], skip_special_tokens=True))


Formatted Input:
 <|im_start|>system
You are a helpful AI assistant that summarizes Reddit posts.<|im_end|>
<|im_start|>user
Summarize the following post:
SUBREDDIT: r/relationships
TITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting
POST: Not sure if this belongs here but it's worth a try. 

Backstory:
When I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him alo

In [17]:
tokenizer0 = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
tokenizer0.pad_token = tokenizer0.eos_token  # make sure padding token is defined

model0 = AutoModelForCausalLM.from_pretrained(
    "thepowerfuldeez/Qwen2-1.5B-Summarize",
    torch_dtype=torch.bfloat16,
    device_map="auto",
    load_in_4bit=True,
    # attn_implementation="flash_attention_2"
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [21]:
# 🔮 Generate output
dpo_output = model0.generate(input_ids, max_new_tokens=200)

# 📝 Decode and print result
print("DPO:", tokenizer0.decode(dpo_output[0], skip_special_tokens=True))

DPO: system
You are a helpful AI assistant that summarizes Reddit posts.
user
Summarize the following post:
SUBREDDIT: r/tifu
TITLE: TIFU bY brushing with Baking Soda without learning how to do it correctly.
POST: Always wanted White Teeth but never visited the dentist since I was 8 due to fear [gotten bad experience as a kid].        

So I heard that baking soda makes your teeth white if you brush your teeth with it.        
What I didn't get from all the reading, is that though it is supposed to be made into a paste, it shouldn't still be gritty.       

I always kept my baking soda paste gritty by putting very little water.        

After brushing straight with it for three months, my gum was extremely sore, but on the up side is, it is true, it is all true, I am amazed myself ! My teeth is very VERY white now compared to the past and even when taking pictures, the teeth becomes the center of attention simply because of how white it is, even my friends jokingly asked if I have pain

In [9]:
sum_cmp = load_dataset("CarperAI/openai_summarize_comparisons")

Downloading readme:   0%|          | 0.00/462 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/4 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/20.7M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/20.4M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/7.12M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/13.1M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/4 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/92534 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/83629 [00:00<?, ? examples/s]

Generating valid1 split:   0%|          | 0/33082 [00:00<?, ? examples/s]

Generating valid2 split:   0%|          | 0/50715 [00:00<?, ? examples/s]

In [10]:
sum_cmp['test'][0]

{'prompt': "SUBREDDIT: r/relationships\nTITLE: My [21/M] girlfriend [19/F] broke up with me after she went through my Facebook without my permission.\nPOST: My girlfriend and I had been dating for 15 months. \n\n**Last week my girlfriend went onto my Facebook account and read through my message history with a couple of girls.**\n\nShe was **searching for a specific girl that I used to flirt with in the past, and she found it.**\n\nWe had fought one time before about me flirting with this girl, and I stopped talking to her entirely for a couple of months (obviously she didn't believe I did).\n\nShe found messages between the girl and I around my birthday in February, and her (message girl) birthday in June. Needless to say they were flirty but with no intentions of ever acting upon them. The girl lives in Europe and I live on the East Coast. But my girlfriend doesn't believe that I ever stopped talking to her, and that I was flirty throughout our entire relationship.\n\nI have no eviden

In [24]:
dataset[0]['prompt']

"SUBREDDIT: r/relationships\nTITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting\nPOST: Not sure if this belongs here but it's worth a try. \n\nBackstory:\nWhen I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. \n\nNow: Its been almost 3 years now and I've gotten better after counselling and mild anti depressants. My mother has been 

In [17]:

input_text = dataset[0]['prompt']
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

# Generate output
dpo_output = model.generate(input_ids, max_new_tokens=50)

# Decode and print the result
print("DPO:", tokenizer.decode(dpo_output[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


DPO: SUBREDDIT: r/relationships
TITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting
POST: Not sure if this belongs here but it's worth a try. 

Backstory:
When I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. 

Now: Its been almost 3 years now and I've gotten better after counselling and mild anti depressants. My mother has been out

In [10]:
input_text = dataset[0]['prompt']
# SFT model
sft_model = AutoModelForCausalLM.from_pretrained("/notebooks/gpt2-supervised-summarize-checkpoint/checkpoint-29000").to(device)
sft_output = sft_model.generate(tokenizer(input_text, return_tensors="pt").input_ids.to(device), max_new_tokens=50)
print("SFT:", tokenizer.decode(sft_output[0], skip_special_tokens=True))

# PPO model
ppo_model = AutoModelForCausalLM.from_pretrained("/notebooks/trlx/examples/summarize_rlhf/ckpts/checkpoint_11000/hf_model").to(device)
ppo_output = ppo_model.generate(tokenizer(input_text, return_tensors="pt").input_ids.to(device), max_new_tokens=50)
print("PPO:", tokenizer.decode(ppo_output[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


SFT: SUBREDDIT: r/relationships
TITLE: Me [19 F] with my friend [19 M], not sure if I may have messed things up already.
POST: Hello hello everybody. I hope this isn't too trivial of a question to ask on here, but I've been feeling a bit out of my depth when it comes to this situation (I've had only one relationship before, and for many reasons, it was out of the ordinary).

Okay! So, a couple of weeks ago, I started talking to this guy on Facebook, through a student group that we were both part of. I thought he was sort of cute, so I sent him a PM just to talk, etc, etc. We're both transfer students at the same school, so I knew that we could eventually meet in person once we both moved on-campus. So, we did, and we hung out maybe twice, just as friends.

Okay. So, everything is going pretty well. We talk over Facebook and Snapchat, whatever. So, Saturday night, I was just hanging out with people and kind of being bored, when I got a Snapchat from him asking what I was doing. I asked 

Some weights of the model checkpoint at /notebooks/trlx/examples/summarize_rlhf/ckpts/checkpoint_11000/hf_model were not used when initializing GPT2LMHeadModel: ['transformer.base_model.transformer.h.0.attn.c_attn.bias', 'transformer.base_model.transformer.h.0.attn.c_attn.weight', 'transformer.base_model.transformer.h.0.attn.c_proj.bias', 'transformer.base_model.transformer.h.0.attn.c_proj.weight', 'transformer.base_model.transformer.h.0.ln_1.bias', 'transformer.base_model.transformer.h.0.ln_1.weight', 'transformer.base_model.transformer.h.0.ln_2.bias', 'transformer.base_model.transformer.h.0.ln_2.weight', 'transformer.base_model.transformer.h.0.mlp.c_fc.bias', 'transformer.base_model.transformer.h.0.mlp.c_fc.weight', 'transformer.base_model.transformer.h.0.mlp.c_proj.bias', 'transformer.base_model.transformer.h.0.mlp.c_proj.weight', 'transformer.base_model.transformer.h.1.attn.c_attn.bias', 'transformer.base_model.transformer.h.1.attn.c_attn.weight', 'transformer.base_model.transforme

PPO: SUBREDDIT: r/relationships
TITLE: Me [19 F] with my friend [19 M], not sure if I may have messed things up already.
POST: Hello hello everybody. I hope this isn't too trivial of a question to ask on here, but I've been feeling a bit out of my depth when it comes to this situation (I've had only one relationship before, and for many reasons, it was out of the ordinary).

Okay! So, a couple of weeks ago, I started talking to this guy on Facebook, through a student group that we were both part of. I thought he was sort of cute, so I sent him a PM just to talk, etc, etc. We're both transfer students at the same school, so I knew that we could eventually meet in person once we both moved on-campus. So, we did, and we hung out maybe twice, just as friends.

Okay. So, everything is going pretty well. We talk over Facebook and Snapchat, whatever. So, Saturday night, I was just hanging out with people and kind of being bored, when I got a Snapchat from him asking what I was doing. I asked 

In [None]:
input_text = 

In [18]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
model = AutoModelForCausalLM.from_pretrained(
    ".Trash-0/files/checkpoint_01000/hf_model",
    # ignore_mismatched_sizes=True  # prevents warnings from crashing things
    torch_dtype=torch.float16,
)


Some weights of the model checkpoint at .Trash-0/files/checkpoint_01000/hf_model were not used when initializing GPT2LMHeadModel: ['base_model.transformer.h.2.ln_1.weight', 'frozen_head.decoder_blocks.1.mlp.c_fc.bias', 'frozen_head.decoder_blocks.2.ln_1.weight', 'base_model.transformer.h.10.mlp.c_fc.bias', 'base_model.transformer.h.0.mlp.c_fc.weight', 'base_model.transformer.h.3.mlp.c_fc.bias', 'base_model.transformer.h.7.ln_1.weight', 'frozen_head.decoder_blocks.3.mlp.c_fc.weight', 'base_model.transformer.h.8.mlp.c_proj.weight', 'v_head.2.weight', 'base_model.transformer.h.4.mlp.c_fc.bias', 'base_model.transformer.h.2.ln_2.weight', 'frozen_head.decoder_blocks.2.attn.c_proj.weight', 'base_model.transformer.h.0.ln_2.bias', 'frozen_head.decoder_blocks.1.attn.c_proj.bias', 'base_model.transformer.h.3.ln_1.bias', 'base_model.transformer.h.3.mlp.c_fc.weight', 'base_model.transformer.h.8.mlp.c_fc.bias', 'base_model.transformer.h.3.mlp.c_proj.weight', 'base_model.transformer.h.0.attn.c_proj.b

In [6]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)  #
model.config.pad_token_id = tokenizer.pad_token_id

In [7]:
torch_dtype=torch.float16,
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
