In [None]:
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

In [1]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from transformers import AutoTokenizer
import userdata


from google.colab import userdata
HF_TOKEN=userdata.get('secretName')


# Login to Hugging Face
HF_Key = userdata.get('HF_TOKEN')
login(token=HF_Key)

# Load base model
model_id = "meta-llama/Llama-2-7b-hf"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model, tokenizer = FastLanguageModel.from_pretrained(
    model_id=model_id,
    max_seq_length=512,
    dtype=None,
    load_in_4bit=True,
)

# Prepare sample dataset
dataset = load_dataset("databricks/databricks-dolly-15k", split="train[:100]")

def format_instruction(sample):
    return f"""### Instruction: {sample['instruction']}

### Input: {sample['context']}

### Response: {sample['response']}"""

# Function to generate response
def generate_response(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=128,
        temperature=0.7,
        do_sample=True,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test pre-fine-tuning performance
test_prompt = """### Instruction: Explain the concept of quantum entanglement.

### Input: Keep it simple and understandable for a high school student.

### Response:"""

print("Before fine-tuning:")
print(generate_response(test_prompt, model, tokenizer))

# Configure LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha=16,
    lora_dropout=0.05,
)

# Prepare training arguments
training_args = transformers.TrainingArguments(
    output_dir="./llama2-lora-output",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    weight_decay=0.01,
    save_strategy="epoch",
)

# Create trainer
trainer = transformers.Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

# Train the model
trainer.train()

# Test post-fine-tuning performance
print("\nAfter fine-tuning:")
print(generate_response(test_prompt, model, tokenizer))

# Save the fine-tuned model
model.save_pretrained("./llama2-lora-finetuned")
tokenizer.save_pretrained("./llama2-lora-finetuned")

KeyboardInterrupt: 