In [None]:
# Install required libraries
!pip install transformers datasets torch sentencepiece accelerate bitsandbytes peft -q

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

In [None]:
from huggingface_hub import login

login(token="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")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
def setup_model_and_tokenizer(model_name):
    print("Loading tokenizer and model...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        # Remove the load_in_8bit argument and related arguments
        device_map="auto",
        torch_dtype=torch.float16,
    )
    return tokenizer, model

In [None]:
def prepare_dataset(tokenizer, file_path):
    """
    Prepare the dataset for fine-tuning.
    """
    dataset = load_dataset('json', data_files=file_path)

    def tokenize_function(examples):
        # Tokenizer'a dolgu belirteci ekleyin
        tokenizer.pad_token = tokenizer.eos_token
        return tokenizer(
            examples['prompt'],
            text_target=examples['completion'],
            padding='max_length',
            truncation=True,
            max_length=512
        )

    return dataset.map(tokenize_function, batched=True, remove_columns=['prompt', 'completion'])

In [None]:
def setup_peft_model(model):
    """
    Set up the PEFT (Parameter-Efficient Fine-Tuning) model.
    """
    print("Setting up PEFT model...")
    # Enable gradient checkpointing
    # Comment out the line below to avoid converting the entire model to float32
    # model.half()
    model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
    config = LoraConfig(
        r=8,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )
    return get_peft_model(model, config)

In [None]:
def test_model(model, tokenizer):
    """
    Test the fine-tuned model with some sample prompts.
    """
    print("Testing the fine-tuned model...")

    # Test prompts
    test_prompts = [
        "What is artificial intelligence?",
        "What are the advantages of Python programming language?",
        "What are the effects of global warming?"
    ]

    model.eval()
    for prompt in test_prompts:
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        with torch.no_grad():
            outputs = model.generate(**inputs, max_length=100, num_return_sequences=1)

        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"\nPrompt: {prompt}")
        print(f"Generated: {generated_text}\n")

In [None]:
def main():
    # Check for GPU availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Set up model and tokenizer
    model_name = "meta-llama/Llama-2-7b-hf"
    tokenizer, model = setup_model_and_tokenizer(model_name)

    # Prepare dataset
    tokenized_dataset = prepare_dataset(tokenizer, 'llama_fine_tuning_data.jsonl')

    # Access the 'train' split of the dataset
    train_dataset = tokenized_dataset['train']

    # Set up PEFT model
    model = setup_peft_model(model)

    # Set up training arguments
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=1,
        per_device_train_batch_size=1,  # Reduced batch size
        gradient_accumulation_steps=8,  # Increased gradient accumulation
        warmup_steps=100,
        logging_steps=10,
        save_steps=200,
        learning_rate=2e-4,
        fp16=True,
        max_grad_norm=0.3,
    )

    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset, # Pass train_dataset to Trainer instead
    )

    # Start training
    print("Starting training...")
    trainer.train()

    # Save the model
    print("Saving model...")
    trainer.save_model('./fine_tuned_llama_lora')
    tokenizer.save_pretrained('./fine_tuned_llama_lora')

    print("Fine-tuning complete and model saved!")

    # Load and test the fine-tuned model
    fine_tuned_model = AutoModelForCausalLM.from_pretrained('./fine_tuned_llama_lora')
    test_model(fine_tuned_model, tokenizer)

    # Interactive testing loop
    while True:
        user_prompt = input("Enter a prompt (or 'q' to quit): ")
        if user_prompt.lower() == 'q':
            break

        inputs = tokenizer(user_prompt, return_tensors="pt").to(model.device)
        with torch.no_grad():
            outputs = model.generate(**inputs, max_length=100, num_return_sequences=1)

        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"\nGenerated: {generated_text}\n")

In [None]:
if __name__ == "__main__":
    main()


Using device: cuda
Loading tokenizer and model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting up PEFT model...


OutOfMemoryError: CUDA out of memory. Tried to allocate 64.00 MiB. GPU 