In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    "Salesforce/codegen-350M-mono",
    use_auth_token="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"  # paste your actual token here as a string
)


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at Salesforce/codegen-350M-mono were not used when initializing CodeGenForCausalLM: ['transformer.h.0.attn.causal_mask', 'transformer.h.1.attn.causal_mask', 'transformer.h.10.attn.causal_mask', 'transformer.h.11.attn.causal_mask', 'transformer.h.12.attn.causal_mask', 'transformer.h.13.attn.causal_mask', 'transformer.h.14.attn.causal_mask', 'transformer.h.15.attn.causal_mask', 'transformer.h.16.attn.causal_mask', 'transformer.h.17.attn.causal_mask', 'transformer.h.18.attn.causal_mask', 'transformer.h.19.attn.causal_mask', 'transformer.h.2.attn.causal_mask', 'transformer.h.3.attn.causal_mask', 'transformer.h.4.attn.causal_mask', 'transformer.h.5.attn.causal_mask', 'transformer.h.6.attn.causal_mask', 'transformer.h.7.attn.causal_mask', 'transformer.h.8.attn.causal_mask', 'transformer.h.9.attn.causal_mask']
- This IS expected if you are initializing CodeGenForCausalLM from the checkpoint of a model train

In [3]:
# models/finetune_model.py
import os
import json
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    LoraConfig,
    get_peft_model,
    TaskType
)

def prepare_training_data(data_path):
    with open(data_path, 'r') as f:
        data = json.load(f)
    samples = [{"text": f"{item['prompt']}{item['completion']}"} for item in data]
    return Dataset.from_list(samples)

def finetune_model(
    base_model_name="Salesforce/codegen-350M-mono",
    data_path="C:\\Users\\annan\\code_generation_comparison\\data\\data\\python_dataset.json",
    output_dir="../models/finetuned-code-model",
    epochs=3,
    batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-5
):
    print(f"Loading base model: {base_model_name}")
    
    # Step 1: Load the base model WITHOUT quantization first
    print("Loading model without quantization...")
    model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    
    # Step 2: Extract model modules to properly configure LoRA
    model_modules = [name for name, _ in model.named_modules() if "proj" in name]
    print("Available modules:", model_modules)
    
    # Step 3: Configure the LoRA adapters
    lora_config = LoraConfig(
        r=8,
        lora_alpha=32,
        target_modules=["qkv_proj", "out_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.CAUSAL_LM
    )
    
    # Step 4: Apply PEFT/LoRA to the model
    model = get_peft_model(model, lora_config)
    print(f"PEFT model class: {model.__class__}")
    model.print_trainable_parameters()
    
    # Step 5: Set up tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    # Step 6: Prepare the dataset
    print("Preparing dataset...")
    dataset = prepare_training_data(data_path)
    
    tokenized_dataset = dataset.map(
        lambda x: tokenizer(x["text"], padding="max_length", truncation=True, max_length=512),
        batched=True,
        remove_columns=["text"]
    )
    
    # Step 7: Configure training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=epochs,
        per_device_train_batch_size=batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=learning_rate,
        weight_decay=0.01,
        save_strategy="epoch",
        logging_dir=os.path.join(output_dir, "logs"),
        logging_steps=10,
        fp16=True,
        optim="adamw_torch"
    )
    
    # Step 8: Create data collator
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
    
    # Step 9: Create and run the trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        data_collator=data_collator
    )
    
    print("Starting fine-tuning...")
    trainer.train()
    
    # Step 10: Save the model (only the LoRA adapter)
    print(f"Saving model to {output_dir}")
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    
    return output_dir

if __name__ == "__main__":
    finetune_model()

Loading base model: Salesforce/codegen-350M-mono
Loading model without quantization...


Some weights of the model checkpoint at Salesforce/codegen-350M-mono were not used when initializing CodeGenForCausalLM: ['transformer.h.0.attn.causal_mask', 'transformer.h.1.attn.causal_mask', 'transformer.h.10.attn.causal_mask', 'transformer.h.11.attn.causal_mask', 'transformer.h.12.attn.causal_mask', 'transformer.h.13.attn.causal_mask', 'transformer.h.14.attn.causal_mask', 'transformer.h.15.attn.causal_mask', 'transformer.h.16.attn.causal_mask', 'transformer.h.17.attn.causal_mask', 'transformer.h.18.attn.causal_mask', 'transformer.h.19.attn.causal_mask', 'transformer.h.2.attn.causal_mask', 'transformer.h.3.attn.causal_mask', 'transformer.h.4.attn.causal_mask', 'transformer.h.5.attn.causal_mask', 'transformer.h.6.attn.causal_mask', 'transformer.h.7.attn.causal_mask', 'transformer.h.8.attn.causal_mask', 'transformer.h.9.attn.causal_mask']
- This IS expected if you are initializing CodeGenForCausalLM from the checkpoint of a model trained on another task or with another architecture (e

Available modules: ['transformer.h.0.attn.qkv_proj', 'transformer.h.0.attn.out_proj', 'transformer.h.1.attn.qkv_proj', 'transformer.h.1.attn.out_proj', 'transformer.h.2.attn.qkv_proj', 'transformer.h.2.attn.out_proj', 'transformer.h.3.attn.qkv_proj', 'transformer.h.3.attn.out_proj', 'transformer.h.4.attn.qkv_proj', 'transformer.h.4.attn.out_proj', 'transformer.h.5.attn.qkv_proj', 'transformer.h.5.attn.out_proj', 'transformer.h.6.attn.qkv_proj', 'transformer.h.6.attn.out_proj', 'transformer.h.7.attn.qkv_proj', 'transformer.h.7.attn.out_proj', 'transformer.h.8.attn.qkv_proj', 'transformer.h.8.attn.out_proj', 'transformer.h.9.attn.qkv_proj', 'transformer.h.9.attn.out_proj', 'transformer.h.10.attn.qkv_proj', 'transformer.h.10.attn.out_proj', 'transformer.h.11.attn.qkv_proj', 'transformer.h.11.attn.out_proj', 'transformer.h.12.attn.qkv_proj', 'transformer.h.12.attn.out_proj', 'transformer.h.13.attn.qkv_proj', 'transformer.h.13.attn.out_proj', 'transformer.h.14.attn.qkv_proj', 'transformer.h

Map: 100%|█████████████████████████████████████████████████████████████████| 5000/5000 [00:10<00:00, 485.67 examples/s]
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Starting fine-tuning...


KeyboardInterrupt: 