Restarted ragapp (Python 3.11.10)

In [None]:
import os
from load_dotenv import load_dotenv
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

load_dotenv()

model_id = "google/gemma-2-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])

model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config, 
    device_map={"":0},
    torch_dtype = torch.float,
    attn_implementation='eager', #added as recommendation
    token=os.environ['HF_TOKEN']
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
text = "Quote: Imagination is more"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

The 'batch_size' attribute of HybridCache is deprecated and will be removed in v4.49. Use the more precisely named 'self.max_batch_size' attribute instead.


Quote: Imagination is more important than knowledge. Knowledge is limited, imagination encircles the world.

Albert Einstein

The world


In [None]:
from datasets import load_dataset
# 
# train_dir = "CLRS-Text-train/data/train-00000-of-00005.parquet"
# eval_dir = "CLRS-Text-train/data/train-00004-of-00005.parquet"
# 
# dataset = load_dataset("parquet", data_files={'train': train_dir, 'test': eval_dir})
# import pandas as pd 
# train_1 = pd.read_parquet(train_dir)

# def formatting_func(example):
    # text = f"Question: {example['question']}\nAnswer: {example['answer']}<eos>"
    # return tokenizer(text)

# dataset = dataset.map(lambda samples: formatting_func(samples))

# dataset.save_to_disk("train_eval_subset2")

dataset = load_dataset('train_eval_subset2')

In [None]:
from peft import LoraConfig

lora_config = LoraConfig(
    r=8,
    target_modules='all-linear',
    task_type='CAUSAL_LM',
)

In [None]:
from datasets import Dataset
from trl import SFTTrainer
from transformers import TrainingArguments

# def formatting_func(example):
    # text = f"Question: {example['question']}\nAnswer: {example['answer']}<eos>"
    # return [text]

trainer = SFTTrainer(
    model=model,
    # train_dataset=Dataset.from_pandas(train_1.iloc[:1000]),
    train_dataset=dataset['train'].select(range(2)),
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=10,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    # formatting_func=formatting_func,
)
# trainer.train()



In [None]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mvolgas[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
1,1.6174
2,1.6174
3,1.5414
4,1.3855
5,1.2512
6,1.1357
7,1.0386
8,0.9592
9,0.8994
10,0.8613


TrainOutput(global_step=10, training_loss=1.2307183623313904, metrics={'train_runtime': 8.3984, 'train_samples_per_second': 4.763, 'train_steps_per_second': 1.191, 'total_flos': 53721392947200.0, 'train_loss': 1.2307183623313904, 'epoch': 10.0})

In [None]:
dataset['test'][0]

{'question': 'bfs:\ns: 9, A: [[1 0 1 0 0 1 0 0 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1], [0 1 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0], [1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 1 0], [0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 0 1], [0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 1 0 0], [1 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1], [0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0], [0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 1 0], [0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 1 0 1 0 0 0], [0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0], [1 0 1 0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 1], [0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0], [0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 0 0 1 0 1 1 1], [1 0 1 1 0 1 1 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1], [1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 0 0 0 1 0 0], [0 0 1 0 1 0 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 0 1], [0 0 0 1 0 0 0 1 1 0 0 1 0 1 1 1 0 0 0 0 1 0 0], [1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0], [0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0], [1 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 

In [None]:
text = f'Question: {dataset['test'][0]["question"]}\nAnswer:'
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

SyntaxError: f-string: unmatched '[' (<ipython-input-8-a7f4d96eaf80>, line 2)

In [None]:
text = f'Question: {dataset["test"][0]["question"]}\nAnswer:'
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: bfs:
s: 9, A: [[1 0 1 0 0 1 0 0 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1], [0 1 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0], [1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 1 0], [0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 0 1], [0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 1 0 0], [1 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1], [0 0 0 0 0 0 1 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0], [0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 1 0], [0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 1 0 1 0 0 0], [0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0], [1 0 1 0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 1], [0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0], [0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 0 0 1 0 1 1 1], [1 0 1 1 0 1 1 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1], [1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 0 0 0 1 0 0], [0 0 1 0 1 0 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 0 1], [0 0 0 1 0 0 0 1 1 0 0 1 0 1 1 1 0 0 0 0 1 0 0], [1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0], [0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0], [1 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0

In [None]:
dataset["test"][0]['answer']

'[0 9 2 9 4 5 6 7 8 9 10 11 12 9 14 9 16 17 18 19 20 21 22], [13 9 1 9 15 13 13 3 8 9 10 1 15 9 1 9 3 17 18 3 15 21 3] | [13 9 1 9 15 13 13 3 12 9 0 1 15 9 1 9 3 0 4 3 15 2 3]\n\n'

No kernel connected