In [None]:

%pip -q install --upgrade pip
!pip install "unsloth>=0.3.0" "torch>=2.0.0" "transformers>=4.36.0" "datasets>=2.14.0" "trl>=0.7.4" "accelerate>=0.24.0" "bitsandbytes>=0.41.0" "scipy>=1.11.0" "click>=8.0.0" "wandb>=0.15.0"

In [None]:
from unsloth import FastLanguageModel, is_bfloat16_supported
import torch
from unsloth.chat_templates import get_chat_template
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments

# Load model & tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="mistralai/Mistral-7B-v0.1",
    max_seq_length=2048,
    full_finetuning=True
)

# (Keeping your current PEFT call; remove this line if you truly want full finetuning)
# model = FastLanguageModel.get_peft_model(model)

# Use a chat template that expects a list of {"from": ..., "value": ...}
tokenizer = get_chat_template(
    tokenizer,
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
)

# Load dataset with 'prompt' and 'response' columns
origdataset = load_dataset("alespalla/chatbot_instruction_prompts", split="train")
pr_dataset = origdataset.select_columns(["prompt", "response"])

# Convert each prompt/response pair into a 2-turn conversation, then to a single 'text' field
def to_chat_text(batch):
    conversations = []
    for p, r in zip(batch["prompt"], batch["response"]):
        conversations.append(
            [
                {"from": "human", "value": p},
                {"from": "gpt", "value": r},
            ]
        )
    texts = [
        tokenizer.apply_chat_template(
            conv, tokenize=False, add_generation_prompt=False
        )
        for conv in conversations
    ]
    return {"text": texts}

dataset = pr_dataset.map(
    to_chat_text,
    batched=True,
    batch_size=100,
    desc="Formatting prompt/response into chat template",
)

# Train
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    dataset_num_proc=2,
    max_seq_length=2048,
    packing=False,  # Can make training faster for short sequences if set True
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=500,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

trainer.train()


Now that the model is trained, you can use it to generate responses based on new prompts. The following cell shows an example of how to do this.

In [None]:
# Example inference
prompt = "Hi" # Updated prompt

# Apply the chat template to the prompt
# We set add_generation_prompt=True to add the assistant's turn start token
input_text = tokenizer.apply_chat_template([{"from": "human", "value": prompt}], tokenize=False, add_generation_prompt=True)

# Tokenize the input text and include attention mask
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(model.device) # Added padding and truncation

# Generate a response, passing the attention mask
outputs = model.generate(inputs.input_ids, max_new_tokens=100, use_cache=True, attention_mask=inputs.attention_mask) # Added attention_mask

# Decode the generated tokens back to text
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the original prompt and the generated response
print("Prompt:")
print(prompt)
print("\nGenerated Response:")
print(response)